From a76066c6a31f2297bfd1ee7373b7ef5072684d5d Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 28 Jun 2018 13:48:34 +0000 Subject: [PATCH 0001/1185] Add int16 support for Pack on GPU This fix tries to add int16 support for Pack on GPU, so that the issue raised in 20370 could be addressed. This fix is related to 20370. Signed-off-by: Yong Tang --- tensorflow/core/kernels/pack_op.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc index 5645275cfa..18ed1ea26a 100644 --- a/tensorflow/core/kernels/pack_op.cc +++ b/tensorflow/core/kernels/pack_op.cc @@ -158,7 +158,8 @@ REGISTER_PACK(string); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_bfloat16(REGISTER_GPU); TF_CALL_int64(REGISTER_GPU); -REGISTER_GPU(bool); +TF_CALL_int16(REGISTER_GPU); +TF_CALL_bool(REGISTER_GPU); #undef REGISTER_GPU // A special GPU kernel for int32. -- GitLab From 81fefe40e1c3ad9a14d9d7d665b25d7e93fb2dfc Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 28 Jun 2018 13:49:34 +0000 Subject: [PATCH 0002/1185] Add test case for int16 support of tf.stack/Pack on gpu Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/stack_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/stack_op_test.py b/tensorflow/python/kernel_tests/stack_op_test.py index 2f27d1839b..eadbcabfd1 100644 --- a/tensorflow/python/kernel_tests/stack_op_test.py +++ b/tensorflow/python/kernel_tests/stack_op_test.py @@ -76,7 +76,7 @@ class StackOpTest(test.TestCase): np.random.seed(7) with self.test_session(use_gpu=True): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): - for dtype in [np.bool, np.float32, np.int32, np.int64]: + for dtype in [np.bool, np.float32, np.int16, np.int32, np.int64]: data = np.random.randn(*shape).astype(dtype) # Stack back into a single tensorflow tensor directly using np array c = array_ops.stack(data) -- GitLab From 6d96cc4d05fd09e5663853a795bcd9a5b01f1732 Mon Sep 17 00:00:00 2001 From: hehongliang Date: Sat, 14 Jul 2018 23:58:23 +0000 Subject: [PATCH 0003/1185] Register gradient function for DepthwiseConv2dNativeBackpropInput and DepthwiseConv2dNativeBackpropFilter --- tensorflow/python/ops/nn_grad.py | 51 +++++++++++++++++ tensorflow/python/ops/nn_grad_test.py | 81 +++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 3a41391340..252447bcac 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -85,6 +85,57 @@ def _Conv2DBackpropFilterGrad(op, grad): data_format=op.get_attr("data_format")) ] +@ops.RegisterGradient("DepthwiseConv2dNativeBackpropInput") +def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): + """The derivatives for deconvolution. + + Args: + op: the Deconvolution op. + grad: the tensor representing the gradient w.r.t. the output + + Returns: + the gradients w.r.t. the input and the filter + """ + return [ + None, + nn_ops.depthwise_conv2d_native_backprop_filter( + grad, + array_ops.shape(op.inputs[1]), + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), + nn_ops.depthwise_conv2d_native( + grad, + op.inputs[1], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] + + +@ops.RegisterGradient("DepthwiseConv2dNativeBackpropFilter") +def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad): + return [ + nn_ops.depthwise_conv2d_native_backprop_input( + array_ops.shape(op.inputs[0]), + grad, + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), None, + nn_ops.depthwise_conv2d_native( + op.inputs[0], + grad, + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] + @ops.RegisterGradient("Conv3D") def _Conv3DGrad(op, grad): diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py index 49d54beb20..729c042858 100644 --- a/tensorflow/python/ops/nn_grad_test.py +++ b/tensorflow/python/ops/nn_grad_test.py @@ -26,6 +26,8 @@ from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import nn_impl +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -47,5 +49,84 @@ class Relu6OpTest(test.TestCase): self.assertLess(error, 1e-4) +class Conv2dOpTest(test.TestCase): + + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + + def testConv2dGradWRTInput(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") + self.run_test(input, y) + + def testConv2dGradWRTFilter(self): + input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") + self.run_test(filter, y) + + def testConv2dBackpropFilterGrad(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(input, filter, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, input)[0] + self.run_test(filter, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, filter)[0] + self.run_test(input, grad_wrt_filter) + + +class DepthwiseConv2dTest(test.TestCase): + + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + def testDepthwiseConv2dGradWRTInput(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(input, filter, strides, padding) + self.run_test(input, y) + + def testDepthwiseConv2dGradWRTFilter(self): + input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(input, filter, strides, padding) + self.run_test(filter, y) + + def testDepthwiseConv2dBackpropFilterGrad(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(input, filter, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, input)[0] + self.run_test(filter, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, filter)[0] + self.run_test(input, grad_wrt_filter) + + if __name__ == "__main__": test.main() -- GitLab From 4062d414975e3637983738a449efb536d226793e Mon Sep 17 00:00:00 2001 From: hehongliang Date: Tue, 17 Jul 2018 06:29:28 +0000 Subject: [PATCH 0004/1185] modify code by google python style --- tensorflow/python/ops/nn_grad.py | 72 ++++++------ tensorflow/python/ops/nn_grad_test.py | 153 ++++++++++++++------------ 2 files changed, 118 insertions(+), 107 deletions(-) mode change 100644 => 100755 tensorflow/python/ops/nn_grad.py mode change 100644 => 100755 tensorflow/python/ops/nn_grad_test.py diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py old mode 100644 new mode 100755 index 252447bcac..77283d2488 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -87,7 +87,7 @@ def _Conv2DBackpropFilterGrad(op, grad): @ops.RegisterGradient("DepthwiseConv2dNativeBackpropInput") def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): - """The derivatives for deconvolution. + """The derivatives for deconvolution. Args: op: the Deconvolution op. @@ -96,45 +96,45 @@ def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): Returns: the gradients w.r.t. the input and the filter """ - return [ - None, - nn_ops.depthwise_conv2d_native_backprop_filter( - grad, - array_ops.shape(op.inputs[1]), - op.inputs[2], - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")), - nn_ops.depthwise_conv2d_native( - grad, - op.inputs[1], - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")) - ] + return [ + None, + nn_ops.depthwise_conv2d_native_backprop_filter( + grad, + array_ops.shape(op.inputs[1]), + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), + nn_ops.depthwise_conv2d_native( + grad, + op.inputs[1], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] @ops.RegisterGradient("DepthwiseConv2dNativeBackpropFilter") def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad): - return [ - nn_ops.depthwise_conv2d_native_backprop_input( - array_ops.shape(op.inputs[0]), - grad, - op.inputs[2], - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")), None, - nn_ops.depthwise_conv2d_native( - op.inputs[0], - grad, - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")) - ] + return [ + nn_ops.depthwise_conv2d_native_backprop_input( + array_ops.shape(op.inputs[0]), + grad, + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), None, + nn_ops.depthwise_conv2d_native( + op.inputs[0], + grad, + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] @ops.RegisterGradient("Conv3D") diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py old mode 100644 new mode 100755 index 729c042858..b3df06fcd2 --- a/tensorflow/python/ops/nn_grad_test.py +++ b/tensorflow/python/ops/nn_grad_test.py @@ -51,81 +51,92 @@ class Relu6OpTest(test.TestCase): class Conv2dOpTest(test.TestCase): - def run_test(self, x, y): - with self.test_session(): - error = gradient_checker.compute_gradient_error( - x, - x.get_shape().as_list(), - y, - y.get_shape().as_list()) - self.assertLess(error, 1e-3) - - - def testConv2dGradWRTInput(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") - self.run_test(input, y) - - def testConv2dGradWRTFilter(self): - input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") - self.run_test(filter, y) - - def testConv2dBackpropFilterGrad(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - out = nn_impl.depthwise_conv2d(input, filter, strides, padding) - - grad_wrt_input = gradients_impl.gradients(out, input)[0] - self.run_test(filter, grad_wrt_input) - - grad_wrt_filter = gradients_impl.gradients(out, filter)[0] - self.run_test(input, grad_wrt_filter) + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + def testConv2dGradWRTInput(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + y = nn_ops.conv2d(x, f, [1, 1, 1, 1], "SAME") + self.run_test(x, y) + + def testConv2dGradWRTFilter(self): + x = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = array_ops.placeholder( + dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + y = nn_ops.conv2d(x, f, [1, 1, 1, 1], "SAME") + self.run_test(f, y) + + def testConv2dBackpropFilterGrad(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(x, f, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, x)[0] + self.run_test(f, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, f)[0] + self.run_test(x, grad_wrt_filter) class DepthwiseConv2dTest(test.TestCase): - def run_test(self, x, y): - with self.test_session(): - error = gradient_checker.compute_gradient_error( - x, - x.get_shape().as_list(), - y, - y.get_shape().as_list()) - self.assertLess(error, 1e-3) - - def testDepthwiseConv2dGradWRTInput(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - y = nn_impl.depthwise_conv2d(input, filter, strides, padding) - self.run_test(input, y) - - def testDepthwiseConv2dGradWRTFilter(self): - input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - y = nn_impl.depthwise_conv2d(input, filter, strides, padding) - self.run_test(filter, y) - - def testDepthwiseConv2dBackpropFilterGrad(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - out = nn_impl.depthwise_conv2d(input, filter, strides, padding) - - grad_wrt_input = gradients_impl.gradients(out, input)[0] - self.run_test(filter, grad_wrt_input) - - grad_wrt_filter = gradients_impl.gradients(out, filter)[0] - self.run_test(input, grad_wrt_filter) + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + def testDepthwiseConv2dGradWRTInput(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(x, f, strides, padding) + self.run_test(x, y) + + def testDepthwiseConv2dGradWRTFilter(self): + x = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = array_ops.placeholder( + dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(x, f, strides, padding) + self.run_test(f, y) + + def testDepthwiseConv2dBackpropFilterGrad(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(x, f, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, x)[0] + self.run_test(f, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, f)[0] + self.run_test(x, grad_wrt_filter) if __name__ == "__main__": -- GitLab From 0ade4eb41fea75de3eaf94075bcfa8009c3b2c4b Mon Sep 17 00:00:00 2001 From: hehongliang Date: Fri, 27 Jul 2018 02:23:15 +0000 Subject: [PATCH 0005/1185] change file permission back to 10064 --- tensorflow/python/ops/nn_grad.py | 0 tensorflow/python/ops/nn_grad_test.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensorflow/python/ops/nn_grad.py mode change 100755 => 100644 tensorflow/python/ops/nn_grad_test.py diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py old mode 100755 new mode 100644 diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py old mode 100755 new mode 100644 -- GitLab From 6655013cf6734dbd8dfc685cd2086fcc032dd04e Mon Sep 17 00:00:00 2001 From: cclauss Date: Sun, 19 Aug 2018 13:42:27 +0200 Subject: [PATCH 0006/1185] ci_build: Upgrade the Python 'six' compatibility module --- tensorflow/tools/ci_build/install/install_pip_packages.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index bb316ecfc9..4943cf0ae1 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -31,8 +31,8 @@ pip2 install virtualenv pip3 install virtualenv # Install six. -pip2 install --upgrade six==1.10.0 -pip3 install --upgrade six==1.10.0 +pip2 install --upgrade six==1.11.0 +pip3 install --upgrade six==1.11.0 # Install absl-py. pip2 install --upgrade absl-py -- GitLab From c7c5f17a4671c51f0f5706b461d70b8573a1659d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 10:58:02 +0800 Subject: [PATCH 0007/1185] TST: add test TensorShape for add_variance --- .../python/keras/engine/topology_test.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py index 079c8dae71..fabcb8b055 100644 --- a/tensorflow/python/keras/engine/topology_test.py +++ b/tensorflow/python/keras/engine/topology_test.py @@ -42,6 +42,34 @@ except ImportError: class TopologyConstructionTest(test.TestCase): + def test_add_variable_supports_TensorShape(self): + + class MyLayer(keras.layers.Layer): + + def build(self, input_shape): + self.a = self.add_variable( + 'a', + tensor_shape.TensorShape([1, 2]), + 'float32') + self.b = self.add_variable( + 'b', + tensor_shape.TensorShape([1, 4]), + 'int32') + self.built = True + + def call(self, inputs): + return inputs + + x1 = input_layer_lib.Input(shape=(1,)) + # Github issue #21838: + # Won't raise exception here when constructing. + layer = MyLayer() + _ = layer.apply(x1) + self.assertEqual(layer.a.get_shape(), + tensor_shape.TensorShape([1, 2])) + self.assertEqual(layer.b.get_shape(), + tensor_shape.TensorShape([1, 4])) + def test_get_updates(self): class MyLayer(keras.layers.Layer): -- GitLab From 24effed4c02c73958c7b3f535f31b09b97b85f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 10:58:29 +0800 Subject: [PATCH 0008/1185] TST: add test TensorShape for initializer --- tensorflow/python/ops/init_ops_test.py | 203 ++++++++++++++----------- 1 file changed, 111 insertions(+), 92 deletions(-) diff --git a/tensorflow/python/ops/init_ops_test.py b/tensorflow/python/ops/init_ops_test.py index 6a1fe17119..96e4258bdb 100644 --- a/tensorflow/python/ops/init_ops_test.py +++ b/tensorflow/python/ops/init_ops_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape as tensor_shape_lib from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test @@ -54,142 +55,160 @@ class InitializersTest(test.TestCase): self.assertGreater(lim, abs(output.min() - target_min)) def test_uniform(self): - tensor_shape = (9, 6, 7) + shape = [9, 6, 7] with self.cached_session(): - self._runner( - init_ops.RandomUniform(minval=-1, maxval=1, seed=124), - tensor_shape, - target_mean=0., - target_max=1, - target_min=-1) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.RandomUniform(minval=-1, maxval=1, seed=124), + tensor_shape, + target_mean=0., + target_max=1, + target_min=-1) def test_normal(self): - tensor_shape = (8, 12, 99) + shape = (8, 12, 99) with self.cached_session(): - self._runner( - init_ops.RandomNormal(mean=0, stddev=1, seed=153), - tensor_shape, - target_mean=0., - target_std=1) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.RandomNormal(mean=0, stddev=1, seed=153), + tensor_shape, + target_mean=0., + target_std=1) def test_truncated_normal(self): - tensor_shape = (12, 99, 7) + shape = (12, 99, 7) with self.cached_session(): - self._runner( - init_ops.TruncatedNormal(mean=0, stddev=1, seed=126), - tensor_shape, - target_mean=0., - target_max=2, - target_min=-2) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.TruncatedNormal(mean=0, stddev=1, seed=126), + tensor_shape, + target_mean=0., + target_max=2, + target_min=-2) def test_constant(self): - tensor_shape = (5, 6, 4) + shape = (5, 6, 4) with self.cached_session(): - self._runner( - init_ops.Constant(2), - tensor_shape, - target_mean=2, - target_max=2, - target_min=2) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.Constant(2), + tensor_shape, + target_mean=2, + target_max=2, + target_min=2) def test_lecun_uniform(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - self._runner( - init_ops.lecun_uniform(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(1. / fan_in) + self._runner( + init_ops.lecun_uniform(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_glorot_uniform_initializer(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, fan_out = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - self._runner( - init_ops.glorot_uniform_initializer(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, fan_out = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / (fan_in + fan_out)) + self._runner( + init_ops.glorot_uniform_initializer(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_he_uniform(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - self._runner( - init_ops.he_uniform(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / fan_in) + self._runner( + init_ops.he_uniform(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_lecun_normal(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - self._runner( - init_ops.lecun_normal(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(1. / fan_in) + self._runner( + init_ops.lecun_normal(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_glorot_normal_initializer(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, fan_out = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - self._runner( - init_ops.glorot_normal_initializer(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, fan_out = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / (fan_in + fan_out)) + self._runner( + init_ops.glorot_normal_initializer(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_he_normal(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - self._runner( - init_ops.he_normal(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / fan_in) + self._runner( + init_ops.he_normal(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_Orthogonal(self): - tensor_shape = (20, 20) + shape = (20, 20) with self.cached_session(): - self._runner(init_ops.Orthogonal(seed=123), tensor_shape, target_mean=0.) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner(init_ops.Orthogonal(seed=123), + tensor_shape, + target_mean=0.) def test_Identity(self): with self.cached_session(): - tensor_shape = (3, 4, 5) - with self.assertRaises(ValueError): + shape = (3, 4, 5) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + with self.assertRaises(ValueError): + self._runner( + init_ops.Identity(), + tensor_shape, + target_mean=1. / int(tensor_shape[0]), + target_max=1.) + + shape = (3, 3) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: self._runner( init_ops.Identity(), tensor_shape, - target_mean=1. / tensor_shape[0], + target_mean=1. / int(tensor_shape[0]), target_max=1.) - tensor_shape = (3, 3) - self._runner( - init_ops.Identity(), - tensor_shape, - target_mean=1. / tensor_shape[0], - target_max=1.) - def test_Zeros(self): - tensor_shape = (4, 5) + shape = (4, 5) with self.cached_session(): - self._runner( - init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.) def test_Ones(self): - tensor_shape = (4, 5) + shape = (4, 5) with self.cached_session(): - self._runner(init_ops.Ones(), tensor_shape, target_mean=1., target_max=1.) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner(init_ops.Ones(), tensor_shape, + target_mean=1., target_max=1.) if __name__ == '__main__': -- GitLab From bd81c8b19e04e0d5f3f28ca73b7f7e2f1b11fdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 10:59:00 +0800 Subject: [PATCH 0009/1185] BUG: initializer should supports TensorShape --- tensorflow/python/ops/init_ops.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 4d75ee3974..3aaac615fb 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -38,6 +38,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import gen_linalg_ops @@ -539,7 +540,8 @@ class Orthogonal(Initializer): num_rows = 1 for dim in shape[:-1]: num_rows *= dim - num_cols = shape[-1] + num_rows = int(num_rows) + num_cols = int(shape[-1]) flat_shape = (num_cols, num_rows) if num_rows < num_cols else (num_rows, num_cols) @@ -1107,6 +1109,8 @@ class Identity(Initializer): "Identity matrix initializer can only be used for 2D matrices.") if dtype is None: dtype = self.dtype + if isinstance(full_shape, tensor_shape.TensorShape): + full_shape = full_shape.as_list() initializer = linalg_ops_impl.eye(*full_shape, dtype=dtype) if partition_info is not None: initializer = array_ops.slice(initializer, partition_info.var_offset, @@ -1287,7 +1291,7 @@ def _compute_fans(shape): shape: Integer shape tuple or TF tensor shape. Returns: - A tuple of scalars (fan_in, fan_out). + A tuple of integer scalars (fan_in, fan_out). """ if len(shape) < 1: # Just to avoid errors for constants. fan_in = fan_out = 1 @@ -1299,12 +1303,12 @@ def _compute_fans(shape): else: # Assuming convolution kernels (2D, 3D, or more). # kernel shape: (..., input_depth, depth) - receptive_field_size = 1. + receptive_field_size = 1 for dim in shape[:-2]: receptive_field_size *= dim fan_in = shape[-2] * receptive_field_size fan_out = shape[-1] * receptive_field_size - return fan_in, fan_out + return int(fan_in), int(fan_out) def _assert_float_dtype(dtype): -- GitLab From 5ec7c8c199368cf950cb31113c6820d872f45de5 Mon Sep 17 00:00:00 2001 From: frreiss Date: Thu, 18 Oct 2018 19:50:42 -0700 Subject: [PATCH 0010/1185] Document undocumented ops Fix missing carriage return --- tensorflow/python/ops/check_ops.py | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index 40b111ea0c..b2511f1bfe 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -1136,6 +1136,25 @@ def _get_diff_for_monotonic_comparison(x): v1=['debugging.is_numeric_tensor', 'is_numeric_tensor']) @deprecation.deprecated_endpoints('is_numeric_tensor') def is_numeric_tensor(tensor): + """Returns `True` if the elements of `tensor` are numbers. + + Specifically, returns `True` if the dtype of `tensor` is one of the following: + + * `tf.float32` + * `tf.float64` + * `tf.int8` + * `tf.int16` + * `tf.int32` + * `tf.int64` + * `tf.uint8` + * `tf.qint8` + * `tf.qint32` + * `tf.quint8` + * `tf.complex64` + + Returns `False` if `tensor` is of a non-numeric type or if `tensor` is not + a `tf.Tensor` object. + """ return isinstance(tensor, ops.Tensor) and tensor.dtype in NUMERIC_TYPES @@ -1283,6 +1302,18 @@ def assert_same_float_dtype(tensors=None, dtype=None): 'debugging.assert_scalar', v1=['debugging.assert_scalar', 'assert_scalar']) @deprecation.deprecated_endpoints('assert_scalar') def assert_scalar(tensor, name=None): + """Statically checks whether a tensor is zero-dimensional. + + Args: + tensor: Value to test. + name: A name for this operation (optional). Defaults to "assert_scalar". + + Raises: + ValueError: If `tensor`'s shape has more than zero dimensions. + + Returns: + The input `tensor`, possibly converted to a `tf.Tensor` + """ with ops.name_scope(name, 'assert_scalar', [tensor]) as name_scope: tensor = ops.convert_to_tensor(tensor, name=name_scope) shape = tensor.get_shape() -- GitLab From c8a0c22e3f94728af57ceaed2f53603d1d3d87a9 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 28 Jun 2018 15:10:41 +0000 Subject: [PATCH 0011/1185] Add int64 for ConcatGPU Signed-off-by: Yong Tang --- tensorflow/core/kernels/concat_lib_gpu.cc | 1 + tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tensorflow/core/kernels/concat_lib_gpu.cc b/tensorflow/core/kernels/concat_lib_gpu.cc index 93e392d303..278ef2b2e8 100644 --- a/tensorflow/core/kernels/concat_lib_gpu.cc +++ b/tensorflow/core/kernels/concat_lib_gpu.cc @@ -116,6 +116,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER); TF_CALL_complex64(REGISTER); TF_CALL_complex128(REGISTER); TF_CALL_int64(REGISTER); +TF_CALL_int16(REGISTER); TF_CALL_bfloat16(REGISTER); TF_CALL_bool(REGISTER); TF_CALL_uint8(REGISTER); diff --git a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc index a561d918bd..752e0ed3b7 100644 --- a/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc +++ b/tensorflow/core/kernels/concat_lib_gpu_impl.cu.cc @@ -202,6 +202,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPUCONCAT32); TF_CALL_complex64(REGISTER_GPUCONCAT32); TF_CALL_complex128(REGISTER_GPUCONCAT32); TF_CALL_int64(REGISTER_GPUCONCAT32); +TF_CALL_int16(REGISTER_GPUCONCAT32); TF_CALL_uint8(REGISTER_GPUCONCAT32); REGISTER_GPUCONCAT32(bfloat16); REGISTER_GPUCONCAT32(bool); @@ -210,6 +211,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPUCONCAT64); TF_CALL_complex64(REGISTER_GPUCONCAT64); TF_CALL_complex128(REGISTER_GPUCONCAT64); TF_CALL_int64(REGISTER_GPUCONCAT64); +TF_CALL_int16(REGISTER_GPUCONCAT64); TF_CALL_uint8(REGISTER_GPUCONCAT64); REGISTER_GPUCONCAT64(bfloat16); REGISTER_GPUCONCAT64(bool); @@ -218,6 +220,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU32); TF_CALL_complex64(REGISTER_GPU32); TF_CALL_complex128(REGISTER_GPU32); TF_CALL_int64(REGISTER_GPU32); +TF_CALL_int16(REGISTER_GPU32); TF_CALL_uint8(REGISTER_GPU32); REGISTER_GPU32(bfloat16); REGISTER_GPU32(bool); @@ -226,6 +229,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU64); TF_CALL_complex64(REGISTER_GPU64); TF_CALL_complex128(REGISTER_GPU64); TF_CALL_int64(REGISTER_GPU64); +TF_CALL_int16(REGISTER_GPU64); TF_CALL_uint8(REGISTER_GPU64); REGISTER_GPU64(bfloat16); REGISTER_GPU64(bool); -- GitLab From 898bb2ad207d2b4cee188ce8b868c76bda8f0ad2 Mon Sep 17 00:00:00 2001 From: zhaoyongke Date: Sun, 18 Nov 2018 01:00:26 +0800 Subject: [PATCH 0012/1185] Take channel_multiplier into considerations. Thanks to smillius, drpngx, HuiyangFei --- .../tools/graph_transforms/fold_batch_norms.cc | 13 ++++++++++--- .../tools/graph_transforms/fold_old_batch_norms.cc | 12 ++++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/graph_transforms/fold_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_batch_norms.cc index cb4230dd82..2d7bb65430 100644 --- a/tensorflow/tools/graph_transforms/fold_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_batch_norms.cc @@ -73,9 +73,16 @@ Status FoldBatchNorms(const GraphDef& input_graph_def, // Make sure all the inputs really are vectors, with as many entries as // there are columns in the weights. - const int weights_cols_index = conv_node.op() == "Conv2D" ? 3 : \ - (conv_node.op() == "DepthwiseConv2dNative" ? 2 : 1); - const int64 weights_cols = weights.shape().dim_size(weights_cols_index); + int64 weights_cols; + if (conv_node.op() == "Conv2D") { + weights_cols = weights.shape().dim_size(3); + } + else if (conv_node.op() == "DepthwiseConv2dNative") { + weights_cols = weights.shape().dim_size(2) * weights.shape().dim_size(3); + } + else { + weights_cols = weights.shape().dim_size(1); + } if ((mul_values.shape().dims() != 1) || (mul_values.shape().dim_size(0) != weights_cols)) { return errors::InvalidArgument( diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index 1a4b141d0e..413361b616 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -116,8 +116,16 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, CHECK_EQ("Const", weights_node.op()); Tensor weights = GetNodeTensorAttr(weights_node, "value"); - const int weights_cols_idx = conv_node.op() == "Conv2D" ? 3 : 2; - const int64 weights_cols = weights.shape().dim_size(weights_cols_idx); + int64 weights_cols; + if (conv_node.op() == "Conv2D") { + weights_cols = weights.shape().dim_size(3); + } + else if (conv_node.op() == "DepthwiseConv2dNative") { + weights_cols = weights.shape().dim_size(2) * weights.shape().dim_size(3); + } + else { + weights_cols = weights.shape().dim_size(1); + } CHECK_EQ(weights_cols, scale_values.size()); // Multiply the original weights by the scale vector. -- GitLab From c1d9c390847471dc233a4a91725f98fdb861fd5a Mon Sep 17 00:00:00 2001 From: zhaoyongke Date: Sun, 18 Nov 2018 01:46:07 +0800 Subject: [PATCH 0013/1185] Add test cases for depthwise conv --- .../graph_transforms/fold_batch_norms_test.cc | 54 ++++++ .../fold_old_batch_norms_test.cc | 164 ++++++++++++++++++ 2 files changed, 218 insertions(+) diff --git a/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc index a5d541feb6..2b5326799e 100644 --- a/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc @@ -87,6 +87,57 @@ class FoldBatchNormsTest : public ::testing::Test { } } + void TestFoldBatchNormsDepthwiseConv2dNative() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({1, 1, 6, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), input_op, weights_op, + {1, 1, 1, 1}, "VALID"); + + Tensor mul_values_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&mul_values_data, {2.0f, 3.0f, 4.0f, 5.0f}); + Output mul_values_op = Const(root.WithOpName("mul_values"), + Input::Initializer(mul_values_data)); + + Output mul_op = Mul(root.WithOpName("output"), conv_op, mul_values_op); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK( + FoldBatchNorms(original_graph_def, {{}, {"output"}}, &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("Mul", node.op()); + } + } + void TestFoldBatchNormsConv2DShared() { auto root = tensorflow::Scope::NewRootScope(); using namespace ::tensorflow::ops; // NOLINT(build/namespaces) @@ -202,6 +253,9 @@ TEST_F(FoldBatchNormsTest, TestFoldBatchNormsConv2D) { TEST_F(FoldBatchNormsTest, TestFoldBatchNormsMatMul) { TestFoldBatchNormsMatMul(); } +TEST_F(FoldBatchNormsTest, TestFoldBatchNormsDepthwiseConv2dNative) { + TestFoldBatchNormsDepthwiseConv2dNative(); +} } // namespace graph_transforms } // namespace tensorflow diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 435f46c107..45637cf9d1 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -121,6 +121,85 @@ class FoldOldBatchNormsTest : public ::testing::Test { } } + void TestFoldOldBatchNormsAfterDepthwiseConv2dNative() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({1, 1, 6, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), + input_op, weights_op, {1, 1, 1, 1}, "VALID"); + + Tensor mean_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&mean_data, {10.0f, 20.0f, 30.0f, 40.0f}); + Output mean_op = + Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); + + Tensor variance_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&variance_data, {0.25f, 0.5f, 0.75f, 1.0f}); + Output variance_op = Const(root.WithOpName("variance_op"), + Input::Initializer(variance_data)); + + Tensor beta_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&beta_data, {0.1f, 0.6f, 1.1f, 1.6f}); + Output beta_op = + Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); + + Tensor gamma_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&gamma_data, {1.0f, 2.0f, 3.0f, 4.0f}); + Output gamma_op = + Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + + NodeDef batch_norm_node; + batch_norm_node.set_op("BatchNormWithGlobalNormalization"); + batch_norm_node.set_name("output"); + AddNodeInput("conv_op", &batch_norm_node); + AddNodeInput("mean_op", &batch_norm_node); + AddNodeInput("variance_op", &batch_norm_node); + AddNodeInput("beta_op", &batch_norm_node); + AddNodeInput("gamma_op", &batch_norm_node); + SetNodeAttr("T", DT_FLOAT, &batch_norm_node); + SetNodeAttr("variance_epsilon", 0.00001f, &batch_norm_node); + SetNodeAttr("scale_after_normalization", false, &batch_norm_node); + *(original_graph_def.mutable_node()->Add()) = batch_norm_node; + original_graph_def.mutable_versions()->set_producer(8); + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, + &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("BatchNormWithGlobalNormalization", node.op()); + } + } + void TestFoldFusedBatchNorms() { auto root = tensorflow::Scope::NewRootScope(); using namespace ::tensorflow::ops; // NOLINT(build/namespaces) @@ -198,6 +277,83 @@ class FoldOldBatchNormsTest : public ::testing::Test { } } + void TestFoldFusedBatchNormsAfterDepthwiseConv2dNative() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({1, 1, 6, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), + input_op, weights_op, {1, 1, 1, 1}, "VALID"); + + Tensor mean_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&mean_data, {10.0f, 20.0f, 30.0f, 40.0f}); + Output mean_op = + Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); + + Tensor variance_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&variance_data, {0.25f, 0.5f, 0.75f, 1.0f}); + Output variance_op = Const(root.WithOpName("variance_op"), + Input::Initializer(variance_data)); + + Tensor beta_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&beta_data, {0.1f, 0.6f, 1.1f, 1.6f}); + Output beta_op = + Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); + + Tensor gamma_data(DT_FLOAT, TensorShape({4})); + test::FillValues(&gamma_data, {1.0f, 2.0f, 3.0f, 4.0f}); + Output gamma_op = + Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + NodeDef batch_norm_node; + batch_norm_node.set_op("FusedBatchNorm"); + batch_norm_node.set_name("output"); + AddNodeInput("conv_op", &batch_norm_node); + AddNodeInput("gamma_op", &batch_norm_node); + AddNodeInput("beta_op", &batch_norm_node); + AddNodeInput("mean_op", &batch_norm_node); + AddNodeInput("variance_op", &batch_norm_node); + SetNodeAttr("T", DT_FLOAT, &batch_norm_node); + SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); + SetNodeAttr("is_training", false, &batch_norm_node); + *(original_graph_def.mutable_node()->Add()) = batch_norm_node; + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, + &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 2e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("FusedBatchNorm", node.op()); + } + } + void TestFoldFusedBatchNormsWithConcat(const bool split) { auto root = tensorflow::Scope::NewRootScope(); using namespace ::tensorflow::ops; // NOLINT(build/namespaces) @@ -410,5 +566,13 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { TestFoldFusedBatchNormsWithBatchToSpace(); } +TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNormsAfterDepthwiseConv2dNative) { + TestFoldOldBatchNormsAfterDepthwiseConv2dNative(); +} + +TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsAfterDepthwiseConv2dNative) { + TestFoldFusedBatchNormsAfterDepthwiseConv2dNative(); +} + } // namespace graph_transforms } // namespace tensorflow -- GitLab From c425f34ad4ef83055bb6ca5a9542d7320558c598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=9C=E5=B1=85?= Date: Thu, 6 Dec 2018 22:04:17 +0800 Subject: [PATCH 0014/1185] clang-format-3.6 regenerated files in this PR --- .../graph_transforms/fold_batch_norms.cc | 12 ++++----- .../graph_transforms/fold_batch_norms_test.cc | 8 +++--- .../graph_transforms/fold_old_batch_norms.cc | 21 +++++++-------- .../fold_old_batch_norms_test.cc | 27 ++++++++++--------- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/tensorflow/tools/graph_transforms/fold_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_batch_norms.cc index 2d7bb65430..0a37620700 100644 --- a/tensorflow/tools/graph_transforms/fold_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_batch_norms.cc @@ -76,11 +76,10 @@ Status FoldBatchNorms(const GraphDef& input_graph_def, int64 weights_cols; if (conv_node.op() == "Conv2D") { weights_cols = weights.shape().dim_size(3); - } - else if (conv_node.op() == "DepthwiseConv2dNative") { - weights_cols = weights.shape().dim_size(2) * weights.shape().dim_size(3); - } - else { + } else if (conv_node.op() == "DepthwiseConv2dNative") { + weights_cols = + weights.shape().dim_size(2) * weights.shape().dim_size(3); + } else { weights_cols = weights.shape().dim_size(1); } if ((mul_values.shape().dims() != 1) || @@ -96,7 +95,8 @@ Status FoldBatchNorms(const GraphDef& input_graph_def, auto scaled_weights_vector = scaled_weights.flat(); for (int64 row = 0; row < weights_vector.dimension(0); ++row) { scaled_weights_vector(row) = - weights_vector(row) * mul_values.flat()(row % weights_cols); + weights_vector(row) * + mul_values.flat()(row % weights_cols); } // Construct the new nodes. diff --git a/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc index 2b5326799e..885fbd59b7 100644 --- a/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_batch_norms_test.cc @@ -104,10 +104,10 @@ class FoldBatchNormsTest : public ::testing::Test { Output weights_op = Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); - Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), input_op, weights_op, - {1, 1, 1, 1}, "VALID"); + Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), input_op, + weights_op, {1, 1, 1, 1}, "VALID"); - Tensor mul_values_data(DT_FLOAT, TensorShape({4})); + Tensor mul_values_data(DT_FLOAT, TensorShape({4})); test::FillValues(&mul_values_data, {2.0f, 3.0f, 4.0f, 5.0f}); Output mul_values_op = Const(root.WithOpName("mul_values"), Input::Initializer(mul_values_data)); @@ -136,7 +136,7 @@ class FoldBatchNormsTest : public ::testing::Test { for (const NodeDef& node : fused_graph_def.node()) { EXPECT_NE("Mul", node.op()); } - } + } void TestFoldBatchNormsConv2DShared() { auto root = tensorflow::Scope::NewRootScope(); diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index 413361b616..8c67bd23b5 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -32,9 +32,9 @@ Status ErrorIfNotVector(const Tensor& input, const string& input_name, int expected_width) { if ((input.shape().dims() != 1) || (input.shape().dim_size(0) != expected_width)) { - return errors::InvalidArgument( - input_name, - " input to batch norm has bad shape: ", input.shape().DebugString()); + return errors::InvalidArgument(input_name, + " input to batch norm has bad shape: ", + input.shape().DebugString()); } return Status::OK(); } @@ -119,11 +119,9 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, int64 weights_cols; if (conv_node.op() == "Conv2D") { weights_cols = weights.shape().dim_size(3); - } - else if (conv_node.op() == "DepthwiseConv2dNative") { + } else if (conv_node.op() == "DepthwiseConv2dNative") { weights_cols = weights.shape().dim_size(2) * weights.shape().dim_size(3); - } - else { + } else { weights_cols = weights.shape().dim_size(1); } CHECK_EQ(weights_cols, scale_values.size()); @@ -134,7 +132,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, auto scaled_weights_vector = scaled_weights.flat(); for (int64 row = 0; row < weights_vector.dimension(0); ++row) { scaled_weights_vector(row) = - weights_vector(row) * scale_values[row % weights_cols]; + weights_vector(row) * scale_values[row % weights_cols]; } // Figure out the remaining bias to add on. Tensor bias_offset(DT_FLOAT, {weights_cols}); @@ -193,7 +191,7 @@ Status FuseBatchNormWithConv(const NodeMatch& match, } Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, - std::vector* new_nodes) { + std::vector* new_nodes) { // Calculate the scale and offset values to apply. std::vector scale_values; std::vector offset_values; @@ -208,9 +206,8 @@ Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, const NodeDef& conv_node = conv_node_match.node; string biasadd_name = conv_node.name() + "/biasadd"; - TF_RETURN_IF_ERROR( - FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, - biasadd_name , new_nodes)); + TF_RETURN_IF_ERROR(FuseScaleOffsetToConvWeights( + scale_values, offset_values, conv_node_match, biasadd_name, new_nodes)); NodeDef new_batch_to_space_node = batch_to_space_node; // reuse batch_norm node name diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 45637cf9d1..925f37745c 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -138,8 +138,8 @@ class FoldOldBatchNormsTest : public ::testing::Test { Output weights_op = Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); - Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), - input_op, weights_op, {1, 1, 1, 1}, "VALID"); + Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), input_op, + weights_op, {1, 1, 1, 1}, "VALID"); Tensor mean_data(DT_FLOAT, TensorShape({4})); test::FillValues(&mean_data, {10.0f, 20.0f, 30.0f, 40.0f}); @@ -164,7 +164,6 @@ class FoldOldBatchNormsTest : public ::testing::Test { GraphDef original_graph_def; TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); - NodeDef batch_norm_node; batch_norm_node.set_op("BatchNormWithGlobalNormalization"); batch_norm_node.set_name("output"); @@ -198,7 +197,7 @@ class FoldOldBatchNormsTest : public ::testing::Test { for (const NodeDef& node : fused_graph_def.node()) { EXPECT_NE("BatchNormWithGlobalNormalization", node.op()); } - } + } void TestFoldFusedBatchNorms() { auto root = tensorflow::Scope::NewRootScope(); @@ -294,8 +293,8 @@ class FoldOldBatchNormsTest : public ::testing::Test { Output weights_op = Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); - Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), - input_op, weights_op, {1, 1, 1, 1}, "VALID"); + Output conv_op = DepthwiseConv2dNative(root.WithOpName("conv_op"), input_op, + weights_op, {1, 1, 1, 1}, "VALID"); Tensor mean_data(DT_FLOAT, TensorShape({4})); test::FillValues(&mean_data, {10.0f, 20.0f, 30.0f, 40.0f}); @@ -477,16 +476,17 @@ void TestFoldFusedBatchNormsWithBatchToSpace() { Tensor block_shape_data(DT_INT32, TensorShape({2})); test::FillValues(&block_shape_data, {1, 2}); - Output block_shape_op = - Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); + Output block_shape_op = Const(root.WithOpName("block_shape_op"), + Input::Initializer(block_shape_data)); Tensor crops_data(DT_INT32, TensorShape({2, 2})); test::FillValues(&crops_data, {0, 0, 0, 1}); Output crops_op = Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); - Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), - conv_op, block_shape_op, crops_data); + Output batch_to_space_op = + BatchToSpaceND(root.WithOpName("batch_to_space_op"), conv_op, + block_shape_op, crops_data); Tensor mean_data(DT_FLOAT, TensorShape({2})); test::FillValues(&mean_data, {10.0f, 20.0f}); @@ -495,8 +495,8 @@ void TestFoldFusedBatchNormsWithBatchToSpace() { Tensor variance_data(DT_FLOAT, TensorShape({2})); test::FillValues(&variance_data, {0.25f, 0.5f}); - Output variance_op = Const(root.WithOpName("variance_op"), - Input::Initializer(variance_data)); + Output variance_op = + Const(root.WithOpName("variance_op"), Input::Initializer(variance_data)); Tensor beta_data(DT_FLOAT, TensorShape({2})); test::FillValues(&beta_data, {0.1f, 0.6f}); @@ -570,7 +570,8 @@ TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNormsAfterDepthwiseConv2dNative) { TestFoldOldBatchNormsAfterDepthwiseConv2dNative(); } -TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsAfterDepthwiseConv2dNative) { +TEST_F(FoldOldBatchNormsTest, + TestFoldFusedBatchNormsAfterDepthwiseConv2dNative) { TestFoldFusedBatchNormsAfterDepthwiseConv2dNative(); } -- GitLab From 46288142246f2d5130e7c3ba5787f616d53e53a9 Mon Sep 17 00:00:00 2001 From: Joe Quadrino Date: Fri, 30 Nov 2018 19:42:36 -0500 Subject: [PATCH 0015/1185] Configurable AWS logging for S3 filesystem AWS logging for S3 is too verbose and should be configurable. export AWS_LOG_LEVEL=FATAL Using TF_CPP_MIN_LOG_LEVEL for the AWS log handler can lead to a high rate of aws logging which drowns out less frequent logs Related commits 7bb0592, f67a7a4 Issue #21898 --- tensorflow/core/platform/s3/aws_logging.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/platform/s3/aws_logging.cc b/tensorflow/core/platform/s3/aws_logging.cc index 44317f1a3e..8eeb2c9492 100644 --- a/tensorflow/core/platform/s3/aws_logging.cc +++ b/tensorflow/core/platform/s3/aws_logging.cc @@ -74,7 +74,9 @@ static const char* kAWSLoggingTag = "AWSLogging"; Aws::Utils::Logging::LogLevel ParseLogLevelFromEnv() { Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Info; - const int64_t level = tensorflow::internal::MinLogLevelFromEnv(); + const int64_t level = + getenv("AWS_LOG_LEVEL") ? tensorflow::internal::LogLevelStrToInt(getenv("AWS_LOG_LEVEL")) + : tensorflow::internal::MinLogLevelFromEnv(); switch (level) { case INFO: -- GitLab From 466711c40783a907cf6867cec5c13c16ed0bf257 Mon Sep 17 00:00:00 2001 From: Taylor Jakobson Date: Fri, 30 Nov 2018 13:50:27 -0600 Subject: [PATCH 0016/1185] Add support for ppc64le_dockerfiles Add support for ppc64le dockerfiles with newest assembler changes. --- tensorflow/tools/dockerfiles/assembler.py | 8 + .../dockerfiles/devel-cpu-jupyter.Dockerfile | 9 +- .../dockerfiles/devel-cpu.Dockerfile | 9 +- .../dockerfiles/devel-gpu-jupyter.Dockerfile | 53 +++--- .../dockerfiles/devel-gpu.Dockerfile | 51 +++--- .../dockerfiles/gpu-jupyter.Dockerfile | 31 ++-- .../dockerfiles/dockerfiles/gpu.Dockerfile | 31 ++-- .../ppc64le/cpu-ppc64le-jupyter.Dockerfile | 92 +++++++++++ .../ppc64le/cpu-ppc64le.Dockerfile | 75 +++++++++ .../devel-cpu-ppc64le-jupyter.Dockerfile | 125 +++++++++++++++ .../ppc64le/devel-cpu-ppc64le.Dockerfile | 108 +++++++++++++ .../devel-gpu-ppc64le-jupyter.Dockerfile | 151 ++++++++++++++++++ .../ppc64le/devel-gpu-ppc64le.Dockerfile | 134 ++++++++++++++++ .../ppc64le/gpu-ppc64le-jupyter.Dockerfile | 125 +++++++++++++++ .../ppc64le/gpu-ppc64le.Dockerfile | 108 +++++++++++++ .../tensorflow-ppc64le.partial.Dockerfile | 28 ++++ .../ubuntu/bazelbuild.partial.Dockerfile | 33 ++++ .../ubuntu/devel-cpu.partial.Dockerfile | 9 +- .../ubuntu/devel-nvidia.partial.Dockerfile | 51 +++--- .../partials/ubuntu/nvidia.partial.Dockerfile | 31 ++-- tensorflow/tools/dockerfiles/spec.yml | 71 ++++++++ tensorflow/tools/dockerfiles/tools.Dockerfile | 2 +- 22 files changed, 1216 insertions(+), 119 deletions(-) create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py index 09537b7314..83b72cb5bb 100644 --- a/tensorflow/tools/dockerfiles/assembler.py +++ b/tensorflow/tools/dockerfiles/assembler.py @@ -34,6 +34,7 @@ import errno import itertools import multiprocessing import os +import platform import re import shutil import sys @@ -552,6 +553,13 @@ def main(argv): if not FLAGS.build_images: continue + # Only build images for host architecture + proc_arch = platform.processor() + is_x86 = proc_arch.startswith('x86') + if (is_x86 and any([arch in tag for arch in ['ppc64le']]) or + not is_x86 and proc_arch not in tag): + continue + # Generate a temporary Dockerfile to use to build, since docker-py # needs a filepath relative to the build context (i.e. the current # directory) diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile index c1f6dafbe0..4657f8b4c7 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile @@ -30,7 +30,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ rsync \ @@ -43,12 +42,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* - + ENV CI_BUILD_PYTHON python -# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1 +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile index b4dfc8b099..ce69ee0650 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile @@ -30,7 +30,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ rsync \ @@ -43,12 +42,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* - + ENV CI_BUILD_PYTHON python -# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1 +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile index 6d76c06332..e41fc18e42 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile @@ -21,23 +21,28 @@ ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base - +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +ARG LIB_DIR_PREFIX=x84_64 + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ - cuda-command-line-tools-10-0 \ - cuda-cublas-dev-10-0 \ - cuda-cudart-dev-10-0 \ - cuda-cufft-dev-10-0 \ - cuda-curand-dev-10-0 \ - cuda-cusolver-dev-10-0 \ - cuda-cusparse-dev-10-0 \ - libcudnn7=7.4.1.5-1+cuda10.0 \ - libcudnn7-dev=7.4.1.5-1+cuda10.0 \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-dev-${CUDA/./-} \ + cuda-cudart-dev-${CUDA/./-} \ + cuda-cufft-dev-${CUDA/./-} \ + cuda-curand-dev-${CUDA/./-} \ + cuda-cusolver-dev-${CUDA/./-} \ + cuda-cusparse-dev-${CUDA/./-} \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libcudnn7-dev=${CUDNN}+cuda${CUDA} \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ rsync \ @@ -48,14 +53,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ git \ && \ - find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ - rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a + find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ + rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a -RUN apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \ +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \ - && rm -rf /var/lib/apt/lists/* + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python @@ -63,12 +69,13 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 -ENV TF_CUDA_VERSION=10.0 -ENV TF_CUDNN_VERSION=7 - -# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1 +ENV TF_CUDA_VERSION=${CUDA} +ENV TF_CUDNN_VERSION=${CUDNN%%.*} +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile index 160abc8763..7ae5010079 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile @@ -21,23 +21,28 @@ ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +ARG LIB_DIR_PREFIX=x84_64 +# Needed for string substitution +SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ - cuda-command-line-tools-10-0 \ - cuda-cublas-dev-10-0 \ - cuda-cudart-dev-10-0 \ - cuda-cufft-dev-10-0 \ - cuda-curand-dev-10-0 \ - cuda-cusolver-dev-10-0 \ - cuda-cusparse-dev-10-0 \ - libcudnn7=7.4.1.5-1+cuda10.0 \ - libcudnn7-dev=7.4.1.5-1+cuda10.0 \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-dev-${CUDA/./-} \ + cuda-cudart-dev-${CUDA/./-} \ + cuda-cufft-dev-${CUDA/./-} \ + cuda-curand-dev-${CUDA/./-} \ + cuda-cusolver-dev-${CUDA/./-} \ + cuda-cusparse-dev-${CUDA/./-} \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libcudnn7-dev=${CUDNN}+cuda${CUDA} \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ rsync \ @@ -48,14 +53,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ git \ && \ - find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ - rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a + find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ + rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a -RUN apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \ +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \ - && rm -rf /var/lib/apt/lists/* + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python @@ -63,12 +69,13 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 -ENV TF_CUDA_VERSION=10.0 -ENV TF_CUDNN_VERSION=7 - -# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1 +ENV TF_CUDA_VERSION=${CUDA} +ENV TF_CUDNN_VERSION=${CUDNN%%.*} +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile index 46252c5413..12eb5afa8a 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile @@ -21,32 +21,37 @@ ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +# Needed for string substitution +SHELL ["/bin/bash", "-c"] # Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ - cuda-command-line-tools-10-0 \ - cuda-cublas-10-0 \ - cuda-cufft-10-0 \ - cuda-curand-10-0 \ - cuda-cusolver-10-0 \ - cuda-cusparse-10-0 \ - libcudnn7=7.4.1.5-1+cuda10.0 \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-${CUDA/./-} \ + cuda-cufft-${CUDA/./-} \ + cuda-curand-${CUDA/./-} \ + cuda-cusolver-${CUDA/./-} \ + cuda-cusparse-${CUDA/./-} \ + curl \ + libcudnn7=${CUDNN}+cuda${CUDA} \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ software-properties-common \ unzip -RUN apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \ +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile index 80e427f824..00664b6b73 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile @@ -21,32 +21,37 @@ ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +# Needed for string substitution +SHELL ["/bin/bash", "-c"] # Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ - cuda-command-line-tools-10-0 \ - cuda-cublas-10-0 \ - cuda-cufft-10-0 \ - cuda-curand-10-0 \ - cuda-cusolver-10-0 \ - cuda-cusparse-10-0 \ - libcudnn7=7.4.1.5-1+cuda10.0 \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-${CUDA/./-} \ + cuda-cufft-${CUDA/./-} \ + cuda-curand-${CUDA/./-} \ + cuda-cusolver-${CUDA/./-} \ + cuda-cusparse-${CUDA/./-} \ + curl \ + libcudnn7=${CUDNN}+cuda${CUDA} \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ software-properties-common \ unzip -RUN apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \ +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile new file mode 100644 index 0000000000..beb3292a9d --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile @@ -0,0 +1,92 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow +RUN apt-get update && apt-get install -y wget libhdf5-dev +RUN ${PIP} install --global-option=build_ext \ + --global-option=-I/usr/include/hdf5/serial/ \ + --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \ + h5py + +# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tensorflow ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \ + fi; \ + MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \ + MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \ + PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \ + wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \ + ${PIP} install ${PACKAGE} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN ${PIP} install jupyter matplotlib + +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +COPY readme-for-jupyter.md README.md +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf +EXPOSE 8888 + +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile new file mode 100644 index 0000000000..083d61bf9a --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le.Dockerfile @@ -0,0 +1,75 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +FROM ubuntu:${UBUNTU_VERSION} as base + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow +RUN apt-get update && apt-get install -y wget libhdf5-dev +RUN ${PIP} install --global-option=build_ext \ + --global-option=-I/usr/include/hdf5/serial/ \ + --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \ + h5py + +# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tensorflow ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \ + fi; \ + MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \ + MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \ + PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \ + wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \ + ${PIP} install ${PACKAGE} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile new file mode 100644 index 0000000000..1f32849735 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile @@ -0,0 +1,125 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + openjdk-8-jdk \ + ${PYTHON}-dev \ + swig + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + + # Build and install bazel +ENV BAZEL_VERSION 0.15.0 +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \ + unzip bazel-$BAZEL_VERSION-dist.zip && \ + bash ./compile.sh && \ + cp output/bazel /usr/local/bin/ && \ + rm -rf /bazel && \ + cd - + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN ${PIP} install jupyter matplotlib + +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +COPY readme-for-jupyter.md README.md +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf +EXPOSE 8888 + +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile new file mode 100644 index 0000000000..cda51c371d --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le.Dockerfile @@ -0,0 +1,108 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +FROM ubuntu:${UBUNTU_VERSION} AS base + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + openjdk-8-jdk \ + ${PYTHON}-dev \ + swig + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + + # Build and install bazel +ENV BAZEL_VERSION 0.15.0 +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \ + unzip bazel-$BAZEL_VERSION-dist.zip && \ + bash ./compile.sh && \ + cp output/bazel /usr/local/bin/ && \ + rm -rf /bazel && \ + cd - + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile new file mode 100644 index 0000000000..d8ee19f66e --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile @@ -0,0 +1,151 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +ARG LIB_DIR_PREFIX=x84_64 + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-dev-${CUDA/./-} \ + cuda-cudart-dev-${CUDA/./-} \ + cuda-cufft-dev-${CUDA/./-} \ + cuda-curand-dev-${CUDA/./-} \ + cuda-cusolver-dev-${CUDA/./-} \ + cuda-cusparse-dev-${CUDA/./-} \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libcudnn7-dev=${CUDNN}+cuda${CUDA} \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + unzip \ + zip \ + zlib1g-dev \ + wget \ + git \ + && \ + find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ + rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a + +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ + && apt-get update \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) + +# Configure the build for our CUDA configuration. +ENV CI_BUILD_PYTHON python +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV TF_NEED_CUDA 1 +ENV TF_NEED_TENSORRT 1 +ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 +ENV TF_CUDA_VERSION=${CUDA} +ENV TF_CUDNN_VERSION=${CUDNN%%.*} +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + openjdk-8-jdk \ + ${PYTHON}-dev \ + swig + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + + # Build and install bazel +ENV BAZEL_VERSION 0.15.0 +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \ + unzip bazel-$BAZEL_VERSION-dist.zip && \ + bash ./compile.sh && \ + cp output/bazel /usr/local/bin/ && \ + rm -rf /bazel && \ + cd - + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN ${PIP} install jupyter matplotlib + +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +COPY readme-for-jupyter.md README.md +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf +EXPOSE 8888 + +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile new file mode 100644 index 0000000000..966070634b --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le.Dockerfile @@ -0,0 +1,134 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +ARG LIB_DIR_PREFIX=x84_64 + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-dev-${CUDA/./-} \ + cuda-cudart-dev-${CUDA/./-} \ + cuda-cufft-dev-${CUDA/./-} \ + cuda-curand-dev-${CUDA/./-} \ + cuda-cusolver-dev-${CUDA/./-} \ + cuda-cusparse-dev-${CUDA/./-} \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libcudnn7-dev=${CUDNN}+cuda${CUDA} \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + rsync \ + software-properties-common \ + unzip \ + zip \ + zlib1g-dev \ + wget \ + git \ + && \ + find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ + rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a + +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ + && apt-get update \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) + +# Configure the build for our CUDA configuration. +ENV CI_BUILD_PYTHON python +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV TF_NEED_CUDA 1 +ENV TF_NEED_TENSORRT 1 +ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 +ENV TF_CUDA_VERSION=${CUDA} +ENV TF_CUDNN_VERSION=${CUDNN%%.*} +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 +ARG CHECKOUT_TF_SRC=0 +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + openjdk-8-jdk \ + ${PYTHON}-dev \ + swig + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + + # Build and install bazel +ENV BAZEL_VERSION 0.15.0 +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \ + unzip bazel-$BAZEL_VERSION-dist.zip && \ + bash ./compile.sh && \ + cp output/bazel /usr/local/bin/ && \ + rm -rf /bazel && \ + cd - + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile new file mode 100644 index 0000000000..449a8d8aa8 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile @@ -0,0 +1,125 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +# Pick up some TF dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-${CUDA/./-} \ + cuda-cufft-${CUDA/./-} \ + cuda-curand-${CUDA/./-} \ + cuda-cusolver-${CUDA/./-} \ + cuda-cusparse-${CUDA/./-} \ + curl \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + software-properties-common \ + unzip + +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ + && apt-get update \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) + +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow +RUN apt-get update && apt-get install -y wget libhdf5-dev +RUN ${PIP} install --global-option=build_ext \ + --global-option=-I/usr/include/hdf5/serial/ \ + --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \ + h5py + +# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tensorflow ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \ + fi; \ + MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \ + MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \ + PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \ + wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \ + ${PIP} install ${PACKAGE} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc + +RUN ${PIP} install jupyter matplotlib + +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ +RUN mkdir /.local && chmod a+rwx /.local +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +COPY readme-for-jupyter.md README.md +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf +EXPOSE 8888 + +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile new file mode 100644 index 0000000000..f01a47f1c0 --- /dev/null +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le.Dockerfile @@ -0,0 +1,108 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# +# THIS IS A GENERATED DOCKERFILE. +# +# This file was assembled from multiple pieces, whose use is documented +# throughout. Please refer to the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 + +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 + +# Needed for string substitution +SHELL ["/bin/bash", "-c"] +# Pick up some TF dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-${CUDA/./-} \ + cuda-cufft-${CUDA/./-} \ + cuda-curand-${CUDA/./-} \ + cuda-cusolver-${CUDA/./-} \ + cuda-cusparse-${CUDA/./-} \ + curl \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libzmq3-dev \ + pkg-config \ + software-properties-common \ + unzip + +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ + && apt-get update \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) + +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH + +ARG USE_PYTHON_3_NOT_2 +ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} +ARG PYTHON=python${_PY_SUFFIX} +ARG PIP=pip${_PY_SUFFIX} + +# See http://bugs.python.org/issue19846 +ENV LANG C.UTF-8 + +RUN apt-get update && apt-get install -y \ + ${PYTHON} \ + ${PYTHON}-pip + +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow +RUN apt-get update && apt-get install -y wget libhdf5-dev +RUN ${PIP} install --global-option=build_ext \ + --global-option=-I/usr/include/hdf5/serial/ \ + --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \ + h5py + +# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tensorflow ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \ + fi; \ + MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \ + MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \ + PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \ + wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \ + ${PIP} install ${PACKAGE} + +COPY bashrc /etc/bash.bashrc +RUN chmod a+rwx /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile new file mode 100644 index 0000000000..1e79574a34 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/tensorflow-ppc64le.partial.Dockerfile @@ -0,0 +1,28 @@ +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow +RUN apt-get update && apt-get install -y wget libhdf5-dev +RUN ${PIP} install --global-option=build_ext \ + --global-option=-I/usr/include/hdf5/serial/ \ + --global-option=-L/usr/lib/powerpc64le-linux-gnu/hdf5/serial \ + h5py + +# CACHE_STOP is used to rerun future commands, otherwise downloading the .whl will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +RUN if [ ${TF_PACKAGE} = tensorflow-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly-gpu ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tensorflow ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/lastSuccessfulBuild/; \ + elif [ ${TF_PACKAGE} = tf-nightly ]; then \ + BASE=https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/lastSuccessfulBuild/; \ + fi; \ + MAJOR=`${PYTHON} -c 'import sys; print(sys.version_info[0])'`; \ + MINOR=`${PYTHON} -c 'import sys; print(sys.version_info[1])'`; \ + PACKAGE=$(wget -qO- ${BASE}"api/xml?xpath=//fileName&wrapper=artifacts" | grep -o "[^<>]*cp${MAJOR}${MINOR}[^<>]*.whl"); \ + wget ${BASE}"artifact/tensorflow_pkg/"${PACKAGE}; \ + ${PIP} install ${PACKAGE} diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile new file mode 100644 index 0000000000..0397ab5fa8 --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile @@ -0,0 +1,33 @@ +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + openjdk-8-jdk \ + ${PYTHON}-dev \ + swig + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + + # Build and install bazel +ENV BAZEL_VERSION 0.15.0 +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + curl -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-dist.zip && \ + unzip bazel-$BAZEL_VERSION-dist.zip && \ + bash ./compile.sh && \ + cp output/bazel /usr/local/bin/ && \ + rm -rf /bazel && \ + cd - diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile index 0652ac4151..aaeda0b207 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile @@ -7,7 +7,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ rsync \ @@ -20,9 +19,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* - + ENV CI_BUILD_PYTHON python -# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1 +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile index 2b4494ac59..8ce4a1879d 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile @@ -1,20 +1,25 @@ -FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +ARG LIB_DIR_PREFIX=x84_64 +# Needed for string substitution +SHELL ["/bin/bash", "-c"] RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ - cuda-command-line-tools-10-0 \ - cuda-cublas-dev-10-0 \ - cuda-cudart-dev-10-0 \ - cuda-cufft-dev-10-0 \ - cuda-curand-dev-10-0 \ - cuda-cusolver-dev-10-0 \ - cuda-cusparse-dev-10-0 \ - libcudnn7=7.4.1.5-1+cuda10.0 \ - libcudnn7-dev=7.4.1.5-1+cuda10.0 \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-dev-${CUDA/./-} \ + cuda-cudart-dev-${CUDA/./-} \ + cuda-cufft-dev-${CUDA/./-} \ + cuda-curand-dev-${CUDA/./-} \ + cuda-cusolver-dev-${CUDA/./-} \ + cuda-cusparse-dev-${CUDA/./-} \ + libcudnn7=${CUDNN}+cuda${CUDA} \ + libcudnn7-dev=${CUDNN}+cuda${CUDA} \ libcurl3-dev \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ rsync \ @@ -25,14 +30,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ git \ && \ - find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ - rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a + find /usr/local/cuda-${CUDA}/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \ + rm /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libcudnn_static_v7.a -RUN apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \ +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \ - && rm -rf /var/lib/apt/lists/* + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/*) # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python @@ -40,9 +46,10 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH ENV TF_NEED_CUDA 1 ENV TF_NEED_TENSORRT 1 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 -ENV TF_CUDA_VERSION=10.0 -ENV TF_CUDNN_VERSION=7 - -# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1 +ENV TF_CUDA_VERSION=${CUDA} +ENV TF_CUDNN_VERSION=${CUDNN%%.*} +# CACHE_STOP is used to rerun future commands, otherwise cloning tensorflow will be cached and will not pull the most recent version +ARG CACHE_STOP=1 +# Check out TensorFlow source code if --build_arg CHECKOUT_TF_SRC=1 ARG CHECKOUT_TF_SRC=0 -RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src +RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src || true diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile index a6393a3280..1d40ed5f98 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile @@ -1,29 +1,34 @@ -FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base +ARG ARCH= +ARG CUDA=10.0 +FROM nvidia/cuda${ARCH:+-$ARCH}:${CUDA}-base-ubuntu${UBUNTU_VERSION} as base +ARG CUDNN=7.4.1.5-1 +# Needed for string substitution +SHELL ["/bin/bash", "-c"] # Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ - cuda-command-line-tools-10-0 \ - cuda-cublas-10-0 \ - cuda-cufft-10-0 \ - cuda-curand-10-0 \ - cuda-cusolver-10-0 \ - cuda-cusparse-10-0 \ - libcudnn7=7.4.1.5-1+cuda10.0 \ + cuda-command-line-tools-${CUDA/./-} \ + cuda-cublas-${CUDA/./-} \ + cuda-cufft-${CUDA/./-} \ + cuda-curand-${CUDA/./-} \ + cuda-cusolver-${CUDA/./-} \ + cuda-cusparse-${CUDA/./-} \ + curl \ + libcudnn7=${CUDNN}+cuda${CUDA} \ libfreetype6-dev \ libhdf5-serial-dev \ - libpng12-dev \ libzmq3-dev \ pkg-config \ software-properties-common \ unzip -RUN apt-get update && \ - apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \ +RUN [ ${ARCH} = ppc64le ] || (apt-get update && \ + apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda${CUDA} \ && apt-get update \ - && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \ + && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda${CUDA} \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/*) # For CUDA profiling, TensorFlow requires CUPTI. ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 19d96e7a3d..d19b1d15fc 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -56,6 +56,13 @@ releases: - "{ubuntu}{jupyter}" - "{ubuntu-devel}{jupyter}" + ppc64le-dockerfiles: + is_dockerfiles: true + upload_images: false + tag_specs: + - "{ubuntu-ppc64le}{jupyter}" + - "{ubuntu-devel-ppc64le}{jupyter}" + slice_sets: py: @@ -122,6 +129,70 @@ slice_sets: args: - CHECKOUT_TF_SRC=1 + ubuntu-ppc64le: + - add_to_name: "-ppc64le" + dockerfile_exclusive_name: "cpu-ppc64le" + dockerfile_subdirectory: "ppc64le" + args: + - UBUNTU_VERSION=18.04 + partials: + - ubuntu/version + - ubuntu/cpu + - ubuntu/python + - tensorflow-ppc64le + - shell + - add_to_name: "-gpu-ppc64le" + dockerfile_exclusive_name: "gpu-ppc64le" + dockerfile_subdirectory: "ppc64le" + args: + - UBUNTU_VERSION=18.04 + - ARCH=ppc64le + - CUDA=10.0 + - TF_PACKAGE=tensorflow-gpu + partials: + - ubuntu/version + - ubuntu/nvidia + - ubuntu/python + - tensorflow-ppc64le + - shell + tests: + - import-gpu.sh + test_runtime: nvidia + + ubuntu-devel-ppc64le: + - add_to_name: "devel-ppc64le" + dockerfile_exclusive_name: "devel-cpu-ppc64le" + dockerfile_subdirectory: "ppc64le" + partials: + - ubuntu/version + - ubuntu/devel-cpu + - ubuntu/python + - ubuntu/bazelbuild + - shell + tests: + - build-cpu.sh + args: + - UBUNTU_VERSION=18.04 + - CHECKOUT_TF_SRC=1 + - add_to_name: "devel-gpu-ppc64le" + dockerfile_exclusive_name: "devel-gpu-ppc64le" + dockerfile_subdirectory: "ppc64le" + args: + - UBUNTU_VERSION=18.04 + - ARCH=ppc64le + - CUDA=10.0 + - LIB_DIR_PREFIX=powerpc64le + - CHECKOUT_TF_SRC=1 + partials: + - ubuntu/version + - ubuntu/devel-nvidia + - ubuntu/python + - ubuntu/bazelbuild + - shell + tests: + - build-gpu.sh + test_runtime: nvidia + nightly: - add_to_name: "nightly" partials: diff --git a/tensorflow/tools/dockerfiles/tools.Dockerfile b/tensorflow/tools/dockerfiles/tools.Dockerfile index e8929295a5..a96b2578cb 100644 --- a/tensorflow/tools/dockerfiles/tools.Dockerfile +++ b/tensorflow/tools/dockerfiles/tools.Dockerfile @@ -17,7 +17,7 @@ # # You can use this image to quickly develop changes to the Dockerfile assembler # or set of TF Docker partials. See README.md for usage instructions. -FROM debian:stretch +FROM ubuntu:16.04 LABEL maintainer="Austin Anderson " RUN apt-get update && apt-get install -y python3 python3-pip bash curl -- GitLab From b99c63abbd331c4d0e459cc3542e3d6a49764d23 Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 18 Jan 2019 16:53:38 -0800 Subject: [PATCH 0017/1185] requantization_op_perchannel_support --- tensorflow/core/BUILD | 1 + .../api_def_QuantizedConv2DAndRelu.pbtxt | 4 + ..._QuantizedConv2DAndReluAndRequantize.pbtxt | 4 + ...api_def_QuantizedConv2DAndRequantize.pbtxt | 4 + .../api_def_QuantizedConv2DWithBias.pbtxt | 4 + ...i_def_QuantizedConv2DWithBiasAndRelu.pbtxt | 4 + ...edConv2DWithBiasAndReluAndRequantize.pbtxt | 4 + ...QuantizedConv2DWithBiasAndRequantize.pbtxt | 4 + ...ithBiasSignedSumAndReluAndRequantize.pbtxt | 4 + ...ef_QuantizedConv2DWithBiasSumAndRelu.pbtxt | 4 + ...onv2DWithBiasSumAndReluAndRequantize.pbtxt | 4 + ...pi_def_RequantizationRangePerChannel.pbtxt | 4 + .../api_def_RequantizePerChannel.pbtxt | 4 + tensorflow/core/kernels/BUILD | 65 +++- .../core/kernels/mkl_quantized_conv_ops.h | 37 ++- ...mkl_requantization_range_per_channel_op.cc | 110 +++++++ .../core/kernels/mkl_requantize_ops_test.cc | 297 ++++++++++++++++++ .../kernels/mkl_requantize_per_channel_op.cc | 171 ++++++++++ tensorflow/core/kernels/requantize.cc | 8 +- tensorflow/core/ops/math_ops.cc | 47 ++- 20 files changed, 775 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRelu.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndReluAndRequantize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRequantize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBias.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRelu.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRequantize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndRelu.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt create mode 100644 tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc create mode 100644 tensorflow/core/kernels/mkl_requantize_ops_test.cc create mode 100644 tensorflow/core/kernels/mkl_requantize_per_channel_op.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 084db7a0fd..4be4f105e6 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1449,6 +1449,7 @@ cc_library( "//tensorflow/core/kernels:mkl_identity_op", "//tensorflow/core/kernels:mkl_input_conversion_op", "//tensorflow/core/kernels:mkl_lrn_op", + "//tensorflow/core/kernels:mkl_requantize_ops", "//tensorflow/core/kernels:mkl_pooling_ops", "//tensorflow/core/kernels:mkl_relu_op", "//tensorflow/core/kernels:mkl_reshape_op", diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRelu.pbtxt new file mode 100644 index 0000000000..17ff15378c --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DAndRelu" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndReluAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndReluAndRequantize.pbtxt new file mode 100644 index 0000000000..b3ab3eba2c --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndReluAndRequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DAndReluAndRequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRequantize.pbtxt new file mode 100644 index 0000000000..8b00c2b7f6 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DAndRequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DAndRequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBias.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBias.pbtxt new file mode 100644 index 0000000000..f309f648ca --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBias.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBias" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRelu.pbtxt new file mode 100644 index 0000000000..b6b73eaae3 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBiasAndRelu" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt new file mode 100644 index 0000000000..101f72708a --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndReluAndRequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBiasAndReluAndRequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRequantize.pbtxt new file mode 100644 index 0000000000..697e268415 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasAndRequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBiasAndRequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt new file mode 100644 index 0000000000..0cf52d6c89 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSignedSumAndReluAndRequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndRelu.pbtxt new file mode 100644 index 0000000000..e91a2b8dc0 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBiasSumAndRelu" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt new file mode 100644 index 0000000000..fe3ec528bf --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_QuantizedConv2DWithBiasSumAndReluAndRequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedConv2DWithBiasSumAndReluAndRequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt new file mode 100644 index 0000000000..8874a53ec1 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt @@ -0,0 +1,4 @@ +op { +graph_op_name: + "RequantizationRangePerChannel" visibility : HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt new file mode 100644 index 0000000000..0e4cd40a36 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt @@ -0,0 +1,4 @@ +op { +graph_op_name: + "RequantizePerChannel" visibility : HIDDEN +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 96536e6945..3a6c7119ff 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6016,7 +6016,9 @@ tf_kernel_library( tf_cc_test( name = "requantization_range_op_test", size = "small", - srcs = ["requantization_range_op_test.cc"], + srcs = [ + "requantization_range_op_test.cc", + ], deps = [ ":ops_testutil", ":ops_util", @@ -6893,6 +6895,67 @@ tf_mkl_kernel_library( deps = NN_DEPS + mkl_deps() + [":cwise_op"], ) +tf_mkl_kernel_library( + name = "mkl_requantize_ops", + srcs = [ + "mkl_requantization_range_per_channel_op.cc", + "mkl_requantize_per_channel_op.cc", + ], + hdrs = [ + "meta_support.h", + "no_op.h", + "reference_gemm.h", + ], + deps = if_mkl( + [ + ":concat_lib_hdrs", + ":conv_ops", + ":cwise_op", + ":eigen_helpers", + ":image_resizer_state", + ":ops_util", + ":pooling_ops", + ":quantization_utils", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//third_party/eigen3", + "@gemmlowp", + ":transpose_functor", + "@mkl_dnn", + "//third_party/mkl:intel_binary_blob", + ], + ), +) + +tf_cc_test_mkl( + name = "mkl_requantize_ops_test", + size = "small", + srcs = ["mkl_requantize_ops_test.cc"], + deps = [ + ":mkl_requantize_ops", + ":ops_testutil", + ":ops_util", + ":quantization_utils", + ":quantized_ops", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test_mkl( name = "mkl_fused_ops_test", size = "small", diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 10825f6962..84a1ccb4c2 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -16,16 +16,18 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ #define TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #ifdef INTEL_MKL namespace tensorflow { template float MklFloatForOneQuantizedLevel(float range_min, float range_max) { - const int64 highest = static_cast(Eigen::NumTraits::highest()); - const int64 lowest = static_cast(Eigen::NumTraits::lowest()); + int64 highest = static_cast(Eigen::NumTraits::highest()); + int64 lowest = static_cast(Eigen::NumTraits::lowest()); + if (lowest < -highest) lowest += 1; + const float float_for_one_quantized_level = (range_max - range_min) / (highest - lowest); return float_for_one_quantized_level; @@ -48,6 +50,35 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, *min_c = c_float_for_one_quant_level * c_lowest; *max_c = c_float_for_one_quant_level * c_highest; } + +template +void MklQuantizationRangeForMultiplication(float min_a, float max_a, + const Tensor& min_b_vector, + const Tensor& max_b_vector, + Tensor** min_c_vector, + Tensor** max_c_vector) { + CHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements()); + CHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements()); + size_t n_channel = min_b_vector.NumElements(); + const int64 c_highest = static_cast(Eigen::NumTraits::highest()); + const int64 c_lowest = static_cast(Eigen::NumTraits::lowest()); + const float* min_b = min_b_vector.flat().data(); + const float* max_b = max_b_vector.flat().data(); + float* min_c = (*min_c_vector)->flat().data(); + float* max_c = (*max_c_vector)->flat().data(); +#pragma omp parallel for + for (size_t n = 0; n < n_channel; n++) { + float a_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_a, max_a); + float b_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_b[n], max_b[n]); + float c_float_for_one_quant_level = + a_float_for_one_quant_level * b_float_for_one_quant_level; + min_c[n] = c_float_for_one_quant_level * c_lowest; + max_c[n] = c_float_for_one_quant_level * c_highest; + } +} + } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc new file mode 100644 index 0000000000..d23a2d320a --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. +#ifdef INTEL_MKL +#define EIGEN_USE_THREADS + +#include +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +class MklRequantizationRangePerChannelOp : public OpKernel { + public: + explicit MklRequantizationRangePerChannelOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("clip_value_max", &clip_value_max_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(kInputTensorIndex); + const Tensor& input_min = ctx->input(kInputMin); + const Tensor& input_max = ctx->input(kInputMax); + + size_t depth = input_max.NumElements(); + OP_REQUIRES(ctx, input_min.dim_size(0) == depth, + errors::InvalidArgument("min has incorrect size, expected ", + depth, " was ", input_min.dim_size(0))); + OP_REQUIRES(ctx, input_max.dim_size(0) == depth, + errors::InvalidArgument("max has incorrect size, expected ", + depth, " was ", input_max.dim_size(0))); + + const float* input_min_data = input_min.flat().data(); + const float* input_max_data = input_max.flat().data(); + std::vector ranges(depth); + bool is_non_negative = true; + Eigen::array shuffling({1, 0}); + auto input_matrix = input.flat_inner_dims(); + auto transposed_input = input_matrix.shuffle(shuffling); + +#pragma omp parallel for + for (size_t i = 0; i < depth; i++) { + Eigen::Tensor min = + transposed_input.chip<0>(i).minimum(); + Eigen::Tensor max = + transposed_input.chip<0>(i).maximum(); + int32_t min_per_channel = min(); + int32_t max_per_channel = max(); + int32_t abs_max = + std::max(std::abs(min_per_channel), std::abs(max_per_channel)); + float scale = + std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); + ranges[i] = (scale * (float)abs_max / (float)(1L << 31)); + if (min_per_channel < 0) is_non_negative = false; + } + + float out_min_max = std::numeric_limits::min(); + for (size_t i = 0; i < depth; i++) { + if (out_min_max < ranges[i]) out_min_max = ranges[i]; + } + // Fixing max to clip_value_max_ (example 6.0 to support relu6) + if (out_min_max > clip_value_max_) out_min_max = clip_value_max_; + + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + output_min->flat()(0) = is_non_negative ? 0.0f : out_min_max * -1.0f; + output_max->flat()(0) = out_min_max; + } + + private: + float clip_value_max_ = std::numeric_limits::infinity(); + const int kInputTensorIndex = 0; + const int kInputMin = 1; + const int kInputMax = 2; + const int kOutputMin = 0; + const int kOutputMax = 1; +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizationRangePerChannel") + .Device(DEVICE_CPU) + .TypeConstraint("T"), + MklRequantizationRangePerChannelOp); +} // namespace tensorflow +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_requantize_ops_test.cc b/tensorflow/core/kernels/mkl_requantize_ops_test.cc new file mode 100644 index 0000000000..44ab936471 --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantize_ops_test.cc @@ -0,0 +1,297 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +#include + +namespace tensorflow { + +class MklRequantizatedOpsTest : public OpsTestBase {}; + +class MklRequantizatedOpsTestHelper : public OpsTestBase { + public: + void Setup(Tensor &input_tensor_qint32, float &range_weights_ch1, + float &range_weights_ch2); + void TestBody(){}; +}; + +void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32, + float &range_weights_ch1, + float &range_weights_ch2) { + // Step 1: Assumption of inputs + // ---------------------------- + // Assume input Tensor T (NHWC) in FP32 has range [0, 5.0] size nt*ht*wt*ct + // Assume input Filter W (NHWC) with 2 output channels of size nw*ht**wt*2 + // logically, Filter W has 2 channels W1 and W2 each of size nw*ht**wt*1 + // Assume input Filter W1(NHWC) in FP32 has range [-2.0, 2.0]size nw*ht**wt*1 + // Assume input Filter W2(NHWC) in FP32 has range [-3.0, 3.0]size nw*ht**wt*1 + + // Step 2: Assumption of Quantizing inputs and weights (per channel) + // ------------------------------------------------------------------ + // When these 2 Tensors, T and W are quantized using a Quantize Op. + // When the input Tensor T (NHWC) is quantized to unsigned int8. + // While the input Filter W (NHWC) is qunatized to signed int8. + // hence T max value is mapped to ((2^8-1) = 255) while W to ((2^7)-1 = 127)) + + // Range of Quantized T in int8[0 , 255] maps to orig T in FP32[0 , 5.0] + // Range of Quantized W1 in int8[-127, 127] maps to orig W1 in FP32[-2.0, 2.0] + // Range of Quantized W2 in int8[-127, 127] maps to orig W2 in FP32[-3.0, 3.0] + + // Hence the resolution of Quantized T will be 5.0/255 + // Hence the resolution of Quantized W1 will be 2.0/127 + // Hence the resolution of Quantized W2 will be 3.0/127 + + // Step 3: Assumption of quantizedconv on quantized input&weights(per channel) + // --------------------------------------------------------------------------- + // The input T and weights W1 (or W2) will be convolved (and multipled) + // The output Tensor T is in int32 whose range is [-2^31, 2^31] + // The Range of the Convolved T*W1 is 2^31 * 5.0/255 * 2.0/127 = 663110.59 + // So Range of Convolved T*W1 in int32[-2^31, 22^31] that maps to + // orig T Range in FP32[0,5.0] * [-2.0, 2.0] is [-663110.59, 663110.59] + + // The Range of the Convolved T*W2 is 2^31 * 5.0/255 * 3.0/127 = 994665.88 + // So Range of Convolved T*W2 in int32[-2^31, 22^31] that maps to + // orig T Range in FP32[0,5.0] * [-3.0, 3.0] is [-994665.88, 994665.88] + + // Step 4: Assumption output above is fed to Requantization_range_perchannel + // -------------------------------------------------------------------------- + // Here we recalculate the new Range for Convolved T*W so that we + // make good use in int8 qunatization from int32 to int8. + + // We assume the above operations are performed and use these values above + // as ranges for Requantization_range_perchannel_op. + range_weights_ch1 = 663110.59; // For W1 channel + range_weights_ch2 = 994665.88; // For W2 Channel + + // We Fill the inputs Tensor T qint32 with arbitrary int32 values + test::FillValues( + &input_tensor_qint32, + {-1000, -2000, 2000, 4000, -3000, -6000, 4000, 8000, + 5000, 10000, -6000, -12000, 7000, 14000, 8000, 16000, + 9000, -18000, -10000, -20000, 11000, 22000, -12000, -24000, + 13000, 26000, 14000, 28000, -15000, -30000, 16000, 32000}); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // See test RequantizationRangePerChannelTest_Basic and/or + // test RequantizationRangePerChannelTest_ClipMax +} + +// Following tests the RequantizationRangePerChannel Op wherein the range +// of the weights is calculated per channel. +TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_Basic) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = pow(2, 31); + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define and input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // Define, Create and initalize the OP in question. + TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel", + "RequantizationRangePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("clip_value_max", clip_max_value) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Run the Kernel + TF_ASSERT_OK(RunOpKernel()); + + // Step 6: Verify Output and Store values to test Requantize_perchannel + // -------------------------------------------------------------------- + + // Verify the Expected Outputs + const float output_min = GetOutput(0)->flat()(0); + const float output_max = GetOutput(1)->flat()(0); + EXPECT_NEAR(-14.8217, output_min, 0.002); + EXPECT_NEAR(14.8217, output_max, 0.002); + + // output range is made use in RequantizePerChannelTest_Basic +} + +TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_ClipMax) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = 6; // Can be used as 6 for Relu 6 activations. + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define and input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // Define, Create and initalize the OP in question. + TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel", + "RequantizationRangePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("clip_value_max", clip_max_value) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Run the Kernel + TF_ASSERT_OK(RunOpKernel()); + + // Step 6: Verify Output and Store values to test Requantize_perchannel + // -------------------------------------------------------------------- + + // Verify the Expected Outputs + const float output_min = GetOutput(0)->flat()(0); + const float output_max = GetOutput(1)->flat()(0); + EXPECT_NEAR(-6.0, output_min, 0.002); // Values are Max as with clip_value + EXPECT_NEAR(6.0, output_max, 0.002); // Values are Max as with clip_value +} + +TEST_F(MklRequantizatedOpsTest, RequantizePerChannelTest_Basic) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = pow(2, 31); + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define an input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 7: Define and run requantize_perchannel + // -------------------------------------------- + // The Output of Requantization_range_op_per_channel which calculated the + // new ranges of int8 is fed to the requantize per channel op. + // Here the Values of Convolved T*W is converted from int32 to int8. + + TF_ASSERT_OK(NodeDefBuilder("requantize_per_channel", "RequantizePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("out_type", DataTypeToEnum::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Calculate the Min and max from Step 6 above + // in RequantizationRangePerChannelTest_Basic + float range_op_output_min = -14.8217; + float range_op_output_max = 14.8217; + + // Add the Requested_min and requested_max stored from Step 6. + AddInputFromArray(TensorShape({1}), {range_op_output_min}); + AddInputFromArray(TensorShape({1}), {range_op_output_max}); + + // Run the kernel + TF_ASSERT_OK(RunOpKernel()); + + // Verify the output with the expected output + Tensor output = *GetOutput(0); + const float output_min = GetOutput(1)->flat()(0); + const float output_max = GetOutput(2)->flat()(0); + EXPECT_NEAR(range_op_output_min, output_min, 0.002); + EXPECT_NEAR(range_op_output_max, output_max, 0.002); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc new file mode 100644 index 0000000000..f9aa550866 --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc @@ -0,0 +1,171 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS +#ifdef INTEL_MKL +#include + +#include "mkldnn.hpp" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class MklRequantizePerChannelOp : public OpKernel { + public: + explicit MklRequantizePerChannelOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_type_)); + OP_REQUIRES(ctx, out_type_ == DT_QINT8 || out_type_ == DT_QUINT8, + errors::InvalidArgument( + "out_type must be qint8 or quint8, but got: " + out_type_)); + } + virtual ~MklRequantizePerChannelOp() {} + void Compute(OpKernelContext* ctx) override { + try { + const Tensor& input = ctx->input(kInputTensorIndex); + const Tensor& input_min_vec = ctx->input(kInputMinVec); + float* input_min_vec_data = (float*)const_cast( + static_cast(input_min_vec.flat().data())); + const Tensor& input_max_vec = ctx->input(kInputMaxVec); + float* input_max_vec_data = (float*)const_cast( + static_cast(input_max_vec.flat().data())); + + const Tensor& input_requested_min = ctx->input(this->kRequestMin); + const float input_requested_min_float = + input_requested_min.flat()(0); + const Tensor& input_requested_max = ctx->input(this->kRequestMax); + const float input_requested_max_float = + input_requested_max.flat()(0); + + size_t depth = input_min_vec.NumElements(); + OP_REQUIRES( + ctx, input_min_vec.dim_size(0) == depth, + errors::InvalidArgument("min has incorrect size, expected ", depth, + " was ", input_min_vec.dim_size(0))); + OP_REQUIRES( + ctx, input_max_vec.dim_size(0) == depth, + errors::InvalidArgument("max has incorrect size, expected ", depth, + " was ", input_max_vec.dim_size(0))); + + if (out_type_ == DT_QINT8) CHECK(input_requested_min_float < 0.0f); + + const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f; + float requested_min_max = std::max(std::abs(input_requested_min_float), + std::abs(input_requested_max_float)); + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputTensorIndex, + input.shape(), &output)); + + std::vector scales(depth); + for (int i = 0; i < depth; i++) { + float min_max_from_vec = std::max(std::abs(input_min_vec_data[i]), + std::abs(input_max_vec_data[i])); + float scale = + factor * (min_max_from_vec / requested_min_max / (float)(1L << 31)); + scales[i] = scale; + } + + mkldnn::primitive_attr reorder_attr; + reorder_attr.set_output_scales(2, scales); + + memory::dims dims_mkl_order = + TFShapeToMklDnnDimsInNCHW(input.shape(), FORMAT_NHWC); + memory::desc input_md = memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc); + memory::desc output_md = + (out_type_ == DT_QINT8) + ? memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc) + : memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc); + + memory::primitive_desc input_pd = + memory::primitive_desc(input_md, cpu_engine_); + memory::primitive_desc output_pd = + memory::primitive_desc(output_md, cpu_engine_); + + void* input_buf = + static_cast(const_cast(input.flat().data())); + void* output_buf; + if (out_type_ == DT_QINT8) { + output_buf = static_cast( + const_cast(output->flat().data())); + } else { + output_buf = static_cast( + const_cast(output->flat().data())); + } + + std::unique_ptr input_mem_prim_(new memory(input_pd, input_buf)); + std::unique_ptr output_mem_prim_( + new memory(output_pd, output_buf)); + + mkldnn::reorder::primitive_desc reorder_pd = + mkldnn::reorder::primitive_desc(input_pd, output_pd, reorder_attr); + std::vector net; + net.push_back( + mkldnn::reorder(reorder_pd, *input_mem_prim_, *output_mem_prim_)); + stream(stream::kind::eager).submit(net).wait(); + + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + + output_min->flat()(0) = input_requested_min_float; + output_max->flat()(0) = input_requested_max_float; + + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + ctx, errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + const int kInputTensorIndex = 0; + const int kInputMinVec = 1; + const int kInputMaxVec = 2; + const int kRequestMin = 3; + const int kRequestMax = 4; + const int kOutputTensorIndex = 0; + const int kOutputMin = 1; + const int kOutputMax = 2; + DataType out_type_; + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizePerChannel") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("out_type"), + MklRequantizePerChannelOp); + +} // namespace tensorflow +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc index dce6f1a185..ab057d5a67 100644 --- a/tensorflow/core/kernels/requantize.cc +++ b/tensorflow/core/kernels/requantize.cc @@ -19,7 +19,6 @@ limitations under the License. #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" @@ -27,6 +26,7 @@ limitations under the License. #include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -100,4 +100,10 @@ REGISTER_KERNEL_BUILDER(Name("Requantize") .TypeConstraint("out_type"), RequantizeOp); +REGISTER_KERNEL_BUILDER(Name("Requantize") + .Device(DEVICE_CPU) + .TypeConstraint("Tinput") + .TypeConstraint("out_type"), + RequantizeOp); + } // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index eb9cbd3225..52243f2fcf 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1247,12 +1247,12 @@ Status RangeSize(const Tensor* start_t, const Tensor* limit_t, T limit = limit_t->scalar()(); T delta = delta_t->scalar()(); if (start > limit && delta > 0) { - return errors::InvalidArgument( - "Requires start <= limit when delta > 0: ", start, "/", limit); + return errors::InvalidArgument("Requires start <= limit when delta > 0: ", + start, "/", limit); } if (start < limit && delta < 0) { - return errors::InvalidArgument( - "Requires start >= limit when delta < 0: ", start, "/", limit); + return errors::InvalidArgument("Requires start >= limit when delta < 0: ", + start, "/", limit); } if (delta == 0) { return errors::InvalidArgument("Requires delta != 0"); @@ -1686,6 +1686,45 @@ Add two input tensors element wise using mkl kernel sum. inputs: Must all be the same size and shape. )doc"); +REGISTER_OP("RequantizePerChannel") + .Input("input: T") + .Input("input_min: float") + .Input("input_max: float") + .Input("requested_output_min: float") + .Input("requested_output_max: float") + .Output("output: out_type") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype = DT_QINT32") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }); +REGISTER_OP("RequantizationRangePerChannel") + .Input("input: T") + .Input("input_min: float") + .Input("input_max: float") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype = DT_QINT32") + .Attr("clip_value_max: float") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); + c->set_output(0, c->Scalar()); + c->set_output(1, c->Scalar()); + return Status::OK(); + }); + #endif // INTEL_MKL REGISTER_OP("NextAfter") -- GitLab From 224b3e32b0ac552a0c2a083c803f60814efa850b Mon Sep 17 00:00:00 2001 From: Albin Joy Date: Mon, 21 Jan 2019 14:32:21 +0530 Subject: [PATCH 0018/1185] Removed twice declaration AttrTypeByName The interface AttrTypeByName was declared twice in attr_builder.h. --- tensorflow/core/common_runtime/eager/attr_builder.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h index aa64b5f59b..1b3fbcbd4a 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder.h +++ b/tensorflow/core/common_runtime/eager/attr_builder.h @@ -54,10 +54,6 @@ Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out, Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, TF_AttrType* out, unsigned char* is_list); -// Looks for 'attr_name' in 'm' and sets 'out' and 'is_list'. -Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, - TF_AttrType* out, unsigned char* is_list); - // KernelAndDevice::Init needs a NodeDef only to pass the attribute map through. // An AttrBuilder is a convenience class to help with that - providing a smaller // interface than NodeDefBuilder and avoiding expensive (unnecessary?) sanity -- GitLab From 9883edb26c9d7293dac2c2977d6606f3a0cb3e5d Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Wed, 23 Jan 2019 13:33:40 -0800 Subject: [PATCH 0019/1185] Adding support for MKL Quantized Concat --- tensorflow/core/BUILD | 14 +- tensorflow/core/api_def/excluded_ops.cc | 6 +- tensorflow/core/framework/common_shape_fns.cc | 20 +- tensorflow/core/framework/common_shape_fns.h | 2 + tensorflow/core/kernels/BUILD | 24 ++ tensorflow/core/kernels/mkl_concat_op.cc | 103 ++++++-- .../kernels/mkl_quantized_concat_op_test.cc | 229 ++++++++++++++++++ .../core/kernels/quantized_concat_op.cc | 25 +- tensorflow/core/ops/mkl_array_ops.cc | 92 +++++++ 9 files changed, 480 insertions(+), 35 deletions(-) create mode 100644 tensorflow/core/kernels/mkl_quantized_concat_op_test.cc create mode 100644 tensorflow/core/ops/mkl_array_ops.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ceec270f99..2966a8fcdd 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1138,6 +1138,13 @@ tf_gen_op_libs( deps = [":protos_all_cc"], ) +tf_gen_op_libs( + op_lib_names = [ + "mkl_array_ops", + ], + deps = [":protos_all_cc"], +) + tf_gen_op_libs( op_lib_names = [ "audio_ops", @@ -1277,7 +1284,10 @@ cc_library( ":training_ops_op_lib", ":user_ops_op_lib", ":word2vec_ops", - ] + if_mkl([":mkl_nn_ops_op_lib"]) + tf_additional_cloud_op_deps(), + ] + if_mkl([ + ":mkl_array_ops_op_lib", + ":mkl_nn_ops_op_lib", + ]) + tf_additional_cloud_op_deps(), alwayslink = 1, ) @@ -4472,7 +4482,7 @@ tf_cc_test( "//tensorflow/cc:scope", "//tensorflow/core/kernels:cwise_op", "//third_party/eigen3", - ], + ] + if_mkl([":mkl_array_ops_op_lib"]), ) tf_cc_test( diff --git a/tensorflow/core/api_def/excluded_ops.cc b/tensorflow/core/api_def/excluded_ops.cc index 02026e94ab..65d2102ac8 100644 --- a/tensorflow/core/api_def/excluded_ops.cc +++ b/tensorflow/core/api_def/excluded_ops.cc @@ -24,9 +24,9 @@ const std::unordered_set* GetExcludedOps() { "GcsConfigureBlockCache", "GcsConfigureCredentials", #ifdef INTEL_MKL // QuantizedFusedOps for Intel CPU - "QuantizedConv2DAndRequantize", "QuantizedConv2DWithBias", - "QuantizedConv2DWithBiasAndRequantize", "QuantizedConv2DAndRelu", - "QuantizedConv2DAndReluAndRequantize", + "QuantizedConcatV2", "QuantizedConv2DAndRequantize", + "QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize", + "QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize", "QuantizedConv2DWithBiasAndRelu", "QuantizedConv2DWithBiasAndReluAndRequantize", "QuantizedConv2DWithBiasSumAndRelu", diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index 876ac188ac..b168a7207f 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -57,9 +57,8 @@ Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size, if (*output_size < 0) { return errors::InvalidArgument( "Computed output size would be negative: ", *output_size, - " [input_size: ", input_size, - ", effective_filter_size: ", effective_filter_size, - ", stride: ", stride, "]"); + " [input_size: ", input_size, ", effective_filter_size: ", + effective_filter_size, ", stride: ", stride, "]"); } return Status::OK(); } @@ -1299,6 +1298,12 @@ Status ConcatV2Shape(InferenceContext* c) { c->num_inputs() - 1 /* dim_index */); } +Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat) { + return ConcatShapeHelper(c, 0 /* start_value_index */, + num_inputs_to_concat /* end_value_index */, + num_inputs_to_concat /* dim_index */); +} + Status BroadcastBinaryOpOutputShapeFnHelper(InferenceContext* c, ShapeHandle shape_x, ShapeHandle shape_y, @@ -1562,11 +1567,10 @@ Status ScatterNdUpdateShape(InferenceContext* c) { Status s = c->Merge(prefix_indices, prefix_updates, &unused); if (!s.ok()) { return errors::InvalidArgument( - "The outer ", num_outer_dims, - " dimensions of indices.shape=", c->DebugString(indices_shape), - " must match the outer ", num_outer_dims, - " dimensions of updates.shape=", c->DebugString(updates_shape), - ": ", s.error_message()); + "The outer ", num_outer_dims, " dimensions of indices.shape=", + c->DebugString(indices_shape), " must match the outer ", + num_outer_dims, " dimensions of updates.shape=", + c->DebugString(updates_shape), ": ", s.error_message()); } ShapeHandle input_suffix; diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h index 14b9688bdc..d421844ee6 100644 --- a/tensorflow/core/framework/common_shape_fns.h +++ b/tensorflow/core/framework/common_shape_fns.h @@ -279,6 +279,8 @@ Status ConcatShape(shape_inference::InferenceContext* c, // Shape function for concat operations. Status ConcatV2Shape(shape_inference::InferenceContext* c); +Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat); + // Shape function for binary operators that broadcast their inputs // and with output to output_index. // Note: out cannot be NULL. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index ce6504e0b6..85a5c990d4 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6570,6 +6570,30 @@ tf_cc_test( ], ) +tf_cc_test_mkl( + name = "mkl_quantized_concat_op_test", + size = "small", + srcs = ["mkl_quantized_concat_op_test.cc"], + deps = [ + ":mkl_concat_op", + ":ops_testutil", + ":ops_util", + ":quantization_utils", + ":quantized_ops", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:mkl_array_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test( name = "quantized_batch_norm_op_test", size = "small", diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 3a5c87485c..a955a90990 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -17,7 +17,6 @@ limitations under the License. #include #include "mkldnn.hpp" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -25,12 +24,15 @@ limitations under the License. #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/concat_lib.h" +#include "tensorflow/core/kernels/concat_lib_cpu.h" +#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" using mkldnn::concat; using mkldnn::stream; -#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -78,9 +80,8 @@ class EigenConcatBaseOp : public OpKernel { const TensorShape& input_shape = input_shapes[0]; int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim; - OP_REQUIRES(c, - (0 <= axis && axis < input_dims) || - (allow_legacy_scalars() && concat_dim == 0), + OP_REQUIRES(c, (0 <= axis && axis < input_dims) || + (allow_legacy_scalars() && concat_dim == 0), errors::InvalidArgument( "ConcatOp : Expected concatenating dimensions in the range " "[", @@ -102,13 +103,12 @@ class EigenConcatBaseOp : public OpKernel { const auto in = values[i]; const bool in_is_scalar = IsLegacyScalar(input_shapes[i]); OP_REQUIRES( - c, - (input_shapes[i].dims() == input_dims) || - (input_is_scalar && in_is_scalar), + c, (input_shapes[i].dims() == input_dims) || + (input_is_scalar && in_is_scalar), errors::InvalidArgument( "ConcatOp : Ranks of all input tensors should match: shape[0] = ", - input_shape.DebugString(), " vs. shape[", i, - "] = ", input_shapes[i].DebugString())); + input_shape.DebugString(), " vs. shape[", i, "] = ", + input_shapes[i].DebugString())); if (in.NumElements() > 0) { int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0; inputs_flat.emplace_back(new typename TTypes::ConstMatrix( @@ -226,9 +226,53 @@ class MklConcatOp : public OpKernel { // format and avoid calling eigen version. if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true; + OpInputList input_mins, input_maxes; + if (std::is_same::value || std::is_same::value) { + // MKL DNN concat does not support input tensors that have different + // ranges, check if the ranges of the all input tensors are the same + // if not, forward it to Eigen implementation. + + OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins)); + OP_REQUIRES(context, (input_mins.size() == N), + errors::InvalidArgument( + "QuantizedConcatOp : Expected mins input list length ", + input_mins.size(), " to equal values length ", N)); + + OP_REQUIRES_OK(context, + context->input_list("input_maxes", &input_maxes)); + OP_REQUIRES(context, (input_maxes.size() == N), + errors::InvalidArgument( + "QuantizedConcatOp : Expected maxes input list length ", + input_maxes.size(), " to equal values length ", N)); + float input_min = input_mins[0].flat()(0); + float input_max = input_maxes[0].flat()(0); + const float eps = 1.0e-6; + for (int i = 1; i < N; i++) { + float min = input_mins[i].flat()(0); + float max = input_maxes[i].flat()(0); + + if (fabs(input_min - min) > eps || fabs(input_max - max) > eps) { + invoke_eigen = true; + break; + } + } + } + // Call Eigen library if (invoke_eigen) { - CallEigenVersion(context, input_tensors, mkl_input_shapes); + if (std::is_same::value || std::is_same::value) { + // MKL DNN quantized concat does not support input tensors with + // different ranges. + // TODO (mabuzain): Add quantized version of CallEigen() to support + // this case. + OP_REQUIRES(context, false, + errors::Unimplemented("MKL DNN quantized concat does not " + "support input tensors that have " + "different ranges")); + } else { + CallEigenVersion(context, input_tensors, mkl_input_shapes); + } + return; } @@ -374,10 +418,27 @@ class MklConcatOp : public OpKernel { std::vector net; net.push_back(concat_op); stream(stream::kind::eager).submit(net).wait(); + + // For quantized concat, min and max outputs are also computed. + if (std::is_same::value || std::is_same::value) { + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + MklDnnShape output_min_mkl_shape, output_max_mkl_shape; + output_min_mkl_shape.SetMklTensor(false); + output_max_mkl_shape.SetMklTensor(false); + AllocateOutputSetMklShape(context, 1, &output_min, {}, + output_min_mkl_shape); + AllocateOutputSetMklShape(context, 2, &output_max, {}, + output_max_mkl_shape); + // All input tensors should have the same range, just use the + // first one + output_min->flat()(0) = input_mins[0].flat()(0); + output_max->flat()(0) = input_maxes[0].flat()(0); + } } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); @@ -490,6 +551,20 @@ class MklConcatOp : public OpKernel { TF_CALL_float(REGISTER_MKL_CPU); +REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .HostMemory("axis") + .Label(mkl_op_registry::kMklQuantizedOpLabel), + MklConcatOp) + +REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .HostMemory("axis") + .Label(mkl_op_registry::kMklQuantizedOpLabel), + MklConcatOp) + #undef REGISTER_CONCAT_MKL } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc b/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc new file mode 100644 index 0000000000..f9098f960a --- /dev/null +++ b/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc @@ -0,0 +1,229 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include +#include +#include + +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +namespace tensorflow { + +using test::graph::Constant; + +// Helper class for converting MKL tesnors to TF tensors and comparing to +// expected values + +static const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0}; +static const TensorShape dummy_shape({8}); + +class ConvMklToTF : public OpsTestBase { + public: + template + void ConvertMKL2TF(DataType dtype, const Tensor& first, const Tensor& second, + Tensor& output) { + // Create an MKL to TF conversion node and execute it + TF_EXPECT_OK(NodeDefBuilder("mkl_to_tf_op", "_MklToTf") + .Input(FakeInput(dtype)) // Input + .Input(FakeInput(DT_UINT8)) // Mkl second tensor + .Attr("T", dtype) + .Attr("_kernel", "MklOp") + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + AddInputFromArray(first.shape(), first.flat()); + AddInputFromArray(second.shape(), second.flat()); + TF_ASSERT_OK(RunOpKernel()); + + output = *GetOutput(0); + } + void TestBody(){}; +}; + +class QuantizedConcatTest : public OpsTestBase { + protected: + QuantizedConcatTest() {} + + void TestSmall8Bit(float first_min, float first_max, float second_min, + float second_max); + void TestSecondDim8Bit(float first_min, float first_max, float second_min, + float second_max); +}; + +TEST_F(QuantizedConcatTest, Small8BitSameRange) { + // Range for both is the same, so impl can use memcpy. + TestSmall8Bit(0.0f, 255.0f, 0.0f, 255.0f); +} + +void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max, + float second_min, float second_max) { + TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "_MklQuantizedConcatV2") + .Input(FakeInput(2, DT_QUINT8)) + .Input(FakeInput(DT_INT32)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Input(FakeInput(DT_UINT8)) // MKl second tensor + .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Attr("N", 2) + .Attr("T", DataTypeToEnum::v()) + .Attr("Tidx", DT_INT32) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const int first_batch = 2; + const int first_height = 2; + const int first_width = 3; + const int first_depth = 1; + Tensor first_float(DT_FLOAT, + {first_batch, first_height, first_width, first_depth}); + test::FillValues(&first_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor first_quantized = + FloatTensorToQuantized(first_float, first_min, first_max); + + const int second_batch = 2; + const int second_height = 2; + const int second_width = 3; + const int second_depth = 1; + Tensor second_float( + DT_FLOAT, {second_batch, second_height, second_width, second_depth}); + test::FillValues(&second_float, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + Tensor second_quantized = + FloatTensorToQuantized(second_float, second_min, second_max); + + const int expected_batch = first_batch + second_batch; + Tensor expected_float( + DT_FLOAT, {expected_batch, first_height, first_width, first_depth}); + test::FillValues(&expected_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + + AddInputFromArray(first_quantized.shape(), + first_quantized.flat()); + AddInputFromArray(second_quantized.shape(), + second_quantized.flat()); + AddInputFromArray(TensorShape({}), {0}); + AddInputFromArray(TensorShape({}), {first_min}); + AddInputFromArray(TensorShape({}), {second_min}); + AddInputFromArray(TensorShape({}), {first_max}); + AddInputFromArray(TensorShape({}), {second_max}); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat()(0); + const float output_max = GetOutput(2)->flat()(0); + Tensor output_float = + QuantizedTensorToFloat(output_quantized, output_min, output_max); + test::ExpectTensorNear(expected_float, output_float, 0.2); +} + +TEST_F(QuantizedConcatTest, SecondDim8BitSameRange) { + TestSecondDim8Bit(-10.0f, 150.0f, -10.0f, 150.0f); +} + +void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max, + float second_min, + float second_max) { + TF_ASSERT_OK(NodeDefBuilder("quantized_concat_op", "_MklQuantizedConcatV2") + .Input(FakeInput(2, DT_QUINT8)) + .Input(FakeInput(DT_INT32)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_FLOAT)) + .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Input(FakeInput(DT_UINT8)) // MKl second tensor + .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Attr("N", 2) + .Attr("T", DataTypeToEnum::v()) + .Attr("Tidx", DT_INT32) + .Attr("_kernel", "QuantizedMklOp") + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + const int first_batch = 2; + const int first_height = 2; + const int first_width = 3; + const int first_depth = 1; + Tensor first_float(DT_FLOAT, + {first_batch, first_height, first_width, first_depth}); + test::FillValues(&first_float, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + Tensor first_quantized = + FloatTensorToQuantized(first_float, first_min, first_max); + + const int second_batch = 2; + const int second_height = 2; + const int second_width = 3; + const int second_depth = 1; + + Tensor second_float( + DT_FLOAT, {second_batch, second_height, second_width, second_depth}); + test::FillValues(&second_float, + {13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}); + Tensor second_quantized = + FloatTensorToQuantized(second_float, second_min, second_max); + + const int expected_height = first_height + second_height; + Tensor expected_float( + DT_FLOAT, {first_batch, expected_height, first_width, first_depth}); + test::FillValues(&expected_float, + {1, 2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, + 7, 8, 9, 10, 11, 12, 19, 20, 21, 22, 23, 24}); + + AddInputFromArray(first_quantized.shape(), + first_quantized.flat()); + AddInputFromArray(second_quantized.shape(), + second_quantized.flat()); + AddInputFromArray(TensorShape({}), {1}); + AddInputFromArray(TensorShape({}), {first_min}); + AddInputFromArray(TensorShape({}), {second_min}); + AddInputFromArray(TensorShape({}), {first_max}); + AddInputFromArray(TensorShape({}), {second_max}); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + TF_ASSERT_OK(RunOpKernel()); + const Tensor& output_quantized = *GetOutput(0); + const float output_min = GetOutput(1)->flat()(0); + const float output_max = GetOutput(2)->flat()(0); + Tensor output_float = + QuantizedTensorToFloat(output_quantized, output_min, output_max); + test::ExpectTensorNear(expected_float, output_float, 1.0); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/quantized_concat_op.cc b/tensorflow/core/kernels/quantized_concat_op.cc index b03ac8e87d..1715849011 100644 --- a/tensorflow/core/kernels/quantized_concat_op.cc +++ b/tensorflow/core/kernels/quantized_concat_op.cc @@ -17,13 +17,13 @@ limitations under the License. #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/concat_lib_cpu.h" #include "tensorflow/core/kernels/quantization_utils.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -135,8 +135,8 @@ class QuantizedConcatOp : public OpKernel { context, in.dims() == input_dims || (input_is_scalar && in_is_scalar), errors::InvalidArgument( "ConcatOp : Ranks of all input tensors should match: shape[0] = ", - input_shape.DebugString(), " vs. shape[", i, - "] = ", in.shape().DebugString())); + input_shape.DebugString(), " vs. shape[", i, "] = ", + in.shape().DebugString())); for (int j = 0; j < input_dims; ++j) { if (j == concat_dim) { continue; @@ -145,8 +145,8 @@ class QuantizedConcatOp : public OpKernel { context, in.dim_size(j) == input_shape.dim_size(j), errors::InvalidArgument( "ConcatOp : Dimensions of inputs should match: shape[0] = ", - input_shape.DebugString(), " vs. shape[", i, - "] = ", in.shape().DebugString())); + input_shape.DebugString(), " vs. shape[", i, "] = ", + in.shape().DebugString())); } if (in.NumElements() > 0) { int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0; @@ -184,9 +184,8 @@ class QuantizedConcatOp : public OpKernel { const int input_dims = values[0].dims(); const TensorShape& input_shape = values[0].shape(); OP_REQUIRES( - context, - (0 <= concat_dim && concat_dim < input_dims) || - (allow_legacy_scalars() && concat_dim == 0), + context, (0 <= concat_dim && concat_dim < input_dims) || + (allow_legacy_scalars() && concat_dim == 0), errors::InvalidArgument( "ConcatOp : Expected concatenating dimensions in the range [", 0, ", ", input_dims, "), but got ", concat_dim)); @@ -246,4 +245,14 @@ REGISTER_QUANTIZED_CONCAT(qint32); #undef REGISTER_QUANTIZED_CONCAT +#define REGISTER_QUANTIZED_CONCATV2(type) \ + REGISTER_KERNEL_BUILDER(Name("QuantizedConcatV2") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .HostMemory("axis"), \ + QuantizedConcatOp) + +REGISTER_QUANTIZED_CONCATV2(quint8); +REGISTER_QUANTIZED_CONCATV2(qint32); + } // namespace tensorflow diff --git a/tensorflow/core/ops/mkl_array_ops.cc b/tensorflow/core/ops/mkl_array_ops.cc new file mode 100644 index 0000000000..ad15d0bf88 --- /dev/null +++ b/tensorflow/core/ops/mkl_array_ops.cc @@ -0,0 +1,92 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +// This file contains the registration of MKL-DNN array ops. + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/util/mirror_pad_mode.h" +#include "tensorflow/core/util/padding.h" +#include "tensorflow/core/util/strided_slice_op.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace tensorflow { + +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; +using shape_inference::UnchangedShape; + +// Adding QuantizedConcatV2 which is similar to the existing QuantizedConcat +// op to be able to replace it by _MklQuantizedConcatV2 in the graph rewrite. +REGISTER_OP("QuantizedConcatV2") + .Input("values: N * T") + .Input("axis: Tidx") + .Input("input_mins: N * float32") + .Input("input_maxes: N * float32") + .Output("output: T") + .Output("output_min: float") + .Output("output_max: float") + .Attr("N: int >= 2") + .Attr("T: type") + .Attr("Tidx: {int32, int64} = DT_INT32") + .SetShapeFn([](InferenceContext* c) { + const int n = (c->num_inputs() - 1) / 3; + TF_RETURN_IF_ERROR(shape_inference::QuantizedConcatV2Shape(c, n)); + ShapeHandle unused; + for (int i = n + 1; i < c->num_inputs(); ++i) { + TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused)); + } + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }); + +REGISTER_OP("_MklQuantizedConcatV2") + .Input("values: N * T") + .Input("axis: Tidx") + .Input("input_mins: N * float32") + .Input("input_maxes: N * float32") + .Input("mkl_values: N * uint8") + .Input("mkl_axis: uint8") + .Input("mkl_input_mins: N * uint8") + .Input("mkl_input_maxes: N * uint8") + .Output("output: T") + .Output("output_min: float") + .Output("output_max: float") + .Output("mkl_output: uint8") + .Output("mkl_output_min: uint8") + .Output("mkl_output_max: uint8") + .Attr("N: int >= 2") + .Attr("T: type") + .Attr("Tidx: {int32, int64} = DT_INT32") + .SetShapeFn([](InferenceContext* c) { + const int n = (c->num_inputs() / 2 - 1) / 3; + TF_RETURN_IF_ERROR(shape_inference::QuantizedConcatV2Shape(c, n)); + ShapeHandle unused; + for (int i = n + 1; i < c->num_inputs() / 2; ++i) { + TF_RETURN_IF_ERROR(c->WithRank(c->input(i), 0, &unused)); + } + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }); +} + +#endif \ No newline at end of file -- GitLab From ba08b73076b853c028500969f4eb733f190e9cac Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Wed, 23 Jan 2019 16:48:59 -0800 Subject: [PATCH 0020/1185] Update profiler ui documentation. --- tensorflow/core/profiler/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index 57d76eb4cb..17c0268cec 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -35,9 +35,8 @@ bazel-bin/tensorflow/core/profiler/profiler \ --profile_path=/tmp/train_dir/profile_xx tfprof> op -select micros,bytes,occurrence -order_by micros -# To be open sourced... -bazel-bin/tensorflow/python/profiler/profiler_ui \ - --profile_path=/tmp/profiles/profile_1 +# Profiler ui available at: https://github.com/tensorflow/profiler-ui +python ui.py --profile_context_path=/tmp/train_dir/profile_xx ``` ![ProfilerUI](g3doc/profiler_ui.jpg) -- GitLab From 70298632d0e4e2b5cd3b9270dd44b2d1bb24a874 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Fri, 25 Jan 2019 10:54:35 +0800 Subject: [PATCH 0021/1185] Workaround MSVC bug that std::isnan cannot handle integral type --- .../xla/service/hlo_evaluator_typed_visitor.h | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 648c7d0e67..9fac9bb318 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -916,9 +916,29 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return HandleShiftRightLogical(shrl); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + // Special case for integral type due to MSVC's std::isnan being unable to + // handle integral type. + template ::value && + std::is_intergal::value>::type* = + nullptr> + Status HandleClamp(HloInstruction* clamp) { + std::function + clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) { + return static_cast( + std::min(high, std::max(value, low))); + }; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[clamp], + ElementwiseTernaryOp(clamp, + std::move(ConvertTernaryFunction(clamp_op)))); + return Status::OK(); + } + + template ::value && + !std::is_intergal::value>::type* = + nullptr> Status HandleClamp(HloInstruction* clamp) { std::function clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) { -- GitLab From 2b47090304d9eec526b7de8fa3e6ebc6e4d7cdcb Mon Sep 17 00:00:00 2001 From: Pariksheet Pinjari Date: Fri, 25 Jan 2019 15:23:51 +0530 Subject: [PATCH 0022/1185] Keras added missing test case in image_test Missing test cases added --- tensorflow/python/keras/preprocessing/image_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py index 4abaadfcd3..f7cbb589dc 100644 --- a/tensorflow/python/keras/preprocessing/image_test.py +++ b/tensorflow/python/keras/preprocessing/image_test.py @@ -386,6 +386,8 @@ class TestImage(test.TestCase): _ = keras.preprocessing.image.random_shift(x, 0.2, 0.2) _ = keras.preprocessing.image.random_shear(x, 2.) _ = keras.preprocessing.image.random_zoom(x, (0.5, 0.5)) + _ = keras.preprocessing.image.apply_channel_shift(x, 2, 2) + _ = keras.preprocessing.image.apply_affine_transform(x, 2) with self.assertRaises(ValueError): keras.preprocessing.image.random_zoom(x, (0, 0, 0)) _ = keras.preprocessing.image.random_channel_shift(x, 2.) -- GitLab From 4e06198e15ca222ac2d32a24ed4b7133f720118e Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 18 Jan 2019 16:53:38 -0800 Subject: [PATCH 0023/1185] requantization_op_perchannel_support --- tensorflow/core/BUILD | 1 + ...pi_def_RequantizationRangePerChannel.pbtxt | 4 + .../api_def_RequantizePerChannel.pbtxt | 4 + tensorflow/core/kernels/BUILD | 65 +++- .../core/kernels/mkl_quantized_conv_ops.h | 37 ++- ...mkl_requantization_range_per_channel_op.cc | 110 +++++++ .../core/kernels/mkl_requantize_ops_test.cc | 297 ++++++++++++++++++ .../kernels/mkl_requantize_per_channel_op.cc | 171 ++++++++++ tensorflow/core/kernels/requantize.cc | 8 +- tensorflow/core/ops/math_ops.cc | 47 ++- 10 files changed, 735 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt create mode 100644 tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc create mode 100644 tensorflow/core/kernels/mkl_requantize_ops_test.cc create mode 100644 tensorflow/core/kernels/mkl_requantize_per_channel_op.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c3d31f91b3..a99b03454d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1450,6 +1450,7 @@ cc_library( "//tensorflow/core/kernels:mkl_identity_op", "//tensorflow/core/kernels:mkl_input_conversion_op", "//tensorflow/core/kernels:mkl_lrn_op", + "//tensorflow/core/kernels:mkl_requantize_ops", "//tensorflow/core/kernels:mkl_pooling_ops", "//tensorflow/core/kernels:mkl_relu_op", "//tensorflow/core/kernels:mkl_reshape_op", diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt new file mode 100644 index 0000000000..8874a53ec1 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt @@ -0,0 +1,4 @@ +op { +graph_op_name: + "RequantizationRangePerChannel" visibility : HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt new file mode 100644 index 0000000000..0e4cd40a36 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt @@ -0,0 +1,4 @@ +op { +graph_op_name: + "RequantizePerChannel" visibility : HIDDEN +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index a59ea5b631..26b8c691e1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6104,7 +6104,9 @@ tf_kernel_library( tf_cc_test( name = "requantization_range_op_test", size = "small", - srcs = ["requantization_range_op_test.cc"], + srcs = [ + "requantization_range_op_test.cc", + ], deps = [ ":ops_testutil", ":ops_util", @@ -6981,6 +6983,67 @@ tf_mkl_kernel_library( deps = NN_DEPS + mkl_deps() + [":cwise_op"], ) +tf_mkl_kernel_library( + name = "mkl_requantize_ops", + srcs = [ + "mkl_requantization_range_per_channel_op.cc", + "mkl_requantize_per_channel_op.cc", + ], + hdrs = [ + "meta_support.h", + "no_op.h", + "reference_gemm.h", + ], + deps = if_mkl( + [ + ":concat_lib_hdrs", + ":conv_ops", + ":cwise_op", + ":eigen_helpers", + ":image_resizer_state", + ":ops_util", + ":pooling_ops", + ":quantization_utils", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//third_party/eigen3", + "@gemmlowp", + ":transpose_functor", + "@mkl_dnn", + "//third_party/mkl:intel_binary_blob", + ], + ), +) + +tf_cc_test_mkl( + name = "mkl_requantize_ops_test", + size = "small", + srcs = ["mkl_requantize_ops_test.cc"], + deps = [ + ":mkl_requantize_ops", + ":ops_testutil", + ":ops_util", + ":quantization_utils", + ":quantized_ops", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test_mkl( name = "mkl_fused_ops_test", size = "small", diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 10825f6962..84a1ccb4c2 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -16,16 +16,18 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ #define TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #ifdef INTEL_MKL namespace tensorflow { template float MklFloatForOneQuantizedLevel(float range_min, float range_max) { - const int64 highest = static_cast(Eigen::NumTraits::highest()); - const int64 lowest = static_cast(Eigen::NumTraits::lowest()); + int64 highest = static_cast(Eigen::NumTraits::highest()); + int64 lowest = static_cast(Eigen::NumTraits::lowest()); + if (lowest < -highest) lowest += 1; + const float float_for_one_quantized_level = (range_max - range_min) / (highest - lowest); return float_for_one_quantized_level; @@ -48,6 +50,35 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, *min_c = c_float_for_one_quant_level * c_lowest; *max_c = c_float_for_one_quant_level * c_highest; } + +template +void MklQuantizationRangeForMultiplication(float min_a, float max_a, + const Tensor& min_b_vector, + const Tensor& max_b_vector, + Tensor** min_c_vector, + Tensor** max_c_vector) { + CHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements()); + CHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements()); + size_t n_channel = min_b_vector.NumElements(); + const int64 c_highest = static_cast(Eigen::NumTraits::highest()); + const int64 c_lowest = static_cast(Eigen::NumTraits::lowest()); + const float* min_b = min_b_vector.flat().data(); + const float* max_b = max_b_vector.flat().data(); + float* min_c = (*min_c_vector)->flat().data(); + float* max_c = (*max_c_vector)->flat().data(); +#pragma omp parallel for + for (size_t n = 0; n < n_channel; n++) { + float a_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_a, max_a); + float b_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_b[n], max_b[n]); + float c_float_for_one_quant_level = + a_float_for_one_quant_level * b_float_for_one_quant_level; + min_c[n] = c_float_for_one_quant_level * c_lowest; + max_c[n] = c_float_for_one_quant_level * c_highest; + } +} + } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc new file mode 100644 index 0000000000..d23a2d320a --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. +#ifdef INTEL_MKL +#define EIGEN_USE_THREADS + +#include +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +class MklRequantizationRangePerChannelOp : public OpKernel { + public: + explicit MklRequantizationRangePerChannelOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("clip_value_max", &clip_value_max_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(kInputTensorIndex); + const Tensor& input_min = ctx->input(kInputMin); + const Tensor& input_max = ctx->input(kInputMax); + + size_t depth = input_max.NumElements(); + OP_REQUIRES(ctx, input_min.dim_size(0) == depth, + errors::InvalidArgument("min has incorrect size, expected ", + depth, " was ", input_min.dim_size(0))); + OP_REQUIRES(ctx, input_max.dim_size(0) == depth, + errors::InvalidArgument("max has incorrect size, expected ", + depth, " was ", input_max.dim_size(0))); + + const float* input_min_data = input_min.flat().data(); + const float* input_max_data = input_max.flat().data(); + std::vector ranges(depth); + bool is_non_negative = true; + Eigen::array shuffling({1, 0}); + auto input_matrix = input.flat_inner_dims(); + auto transposed_input = input_matrix.shuffle(shuffling); + +#pragma omp parallel for + for (size_t i = 0; i < depth; i++) { + Eigen::Tensor min = + transposed_input.chip<0>(i).minimum(); + Eigen::Tensor max = + transposed_input.chip<0>(i).maximum(); + int32_t min_per_channel = min(); + int32_t max_per_channel = max(); + int32_t abs_max = + std::max(std::abs(min_per_channel), std::abs(max_per_channel)); + float scale = + std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); + ranges[i] = (scale * (float)abs_max / (float)(1L << 31)); + if (min_per_channel < 0) is_non_negative = false; + } + + float out_min_max = std::numeric_limits::min(); + for (size_t i = 0; i < depth; i++) { + if (out_min_max < ranges[i]) out_min_max = ranges[i]; + } + // Fixing max to clip_value_max_ (example 6.0 to support relu6) + if (out_min_max > clip_value_max_) out_min_max = clip_value_max_; + + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + output_min->flat()(0) = is_non_negative ? 0.0f : out_min_max * -1.0f; + output_max->flat()(0) = out_min_max; + } + + private: + float clip_value_max_ = std::numeric_limits::infinity(); + const int kInputTensorIndex = 0; + const int kInputMin = 1; + const int kInputMax = 2; + const int kOutputMin = 0; + const int kOutputMax = 1; +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizationRangePerChannel") + .Device(DEVICE_CPU) + .TypeConstraint("T"), + MklRequantizationRangePerChannelOp); +} // namespace tensorflow +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_requantize_ops_test.cc b/tensorflow/core/kernels/mkl_requantize_ops_test.cc new file mode 100644 index 0000000000..44ab936471 --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantize_ops_test.cc @@ -0,0 +1,297 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +#include + +namespace tensorflow { + +class MklRequantizatedOpsTest : public OpsTestBase {}; + +class MklRequantizatedOpsTestHelper : public OpsTestBase { + public: + void Setup(Tensor &input_tensor_qint32, float &range_weights_ch1, + float &range_weights_ch2); + void TestBody(){}; +}; + +void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32, + float &range_weights_ch1, + float &range_weights_ch2) { + // Step 1: Assumption of inputs + // ---------------------------- + // Assume input Tensor T (NHWC) in FP32 has range [0, 5.0] size nt*ht*wt*ct + // Assume input Filter W (NHWC) with 2 output channels of size nw*ht**wt*2 + // logically, Filter W has 2 channels W1 and W2 each of size nw*ht**wt*1 + // Assume input Filter W1(NHWC) in FP32 has range [-2.0, 2.0]size nw*ht**wt*1 + // Assume input Filter W2(NHWC) in FP32 has range [-3.0, 3.0]size nw*ht**wt*1 + + // Step 2: Assumption of Quantizing inputs and weights (per channel) + // ------------------------------------------------------------------ + // When these 2 Tensors, T and W are quantized using a Quantize Op. + // When the input Tensor T (NHWC) is quantized to unsigned int8. + // While the input Filter W (NHWC) is qunatized to signed int8. + // hence T max value is mapped to ((2^8-1) = 255) while W to ((2^7)-1 = 127)) + + // Range of Quantized T in int8[0 , 255] maps to orig T in FP32[0 , 5.0] + // Range of Quantized W1 in int8[-127, 127] maps to orig W1 in FP32[-2.0, 2.0] + // Range of Quantized W2 in int8[-127, 127] maps to orig W2 in FP32[-3.0, 3.0] + + // Hence the resolution of Quantized T will be 5.0/255 + // Hence the resolution of Quantized W1 will be 2.0/127 + // Hence the resolution of Quantized W2 will be 3.0/127 + + // Step 3: Assumption of quantizedconv on quantized input&weights(per channel) + // --------------------------------------------------------------------------- + // The input T and weights W1 (or W2) will be convolved (and multipled) + // The output Tensor T is in int32 whose range is [-2^31, 2^31] + // The Range of the Convolved T*W1 is 2^31 * 5.0/255 * 2.0/127 = 663110.59 + // So Range of Convolved T*W1 in int32[-2^31, 22^31] that maps to + // orig T Range in FP32[0,5.0] * [-2.0, 2.0] is [-663110.59, 663110.59] + + // The Range of the Convolved T*W2 is 2^31 * 5.0/255 * 3.0/127 = 994665.88 + // So Range of Convolved T*W2 in int32[-2^31, 22^31] that maps to + // orig T Range in FP32[0,5.0] * [-3.0, 3.0] is [-994665.88, 994665.88] + + // Step 4: Assumption output above is fed to Requantization_range_perchannel + // -------------------------------------------------------------------------- + // Here we recalculate the new Range for Convolved T*W so that we + // make good use in int8 qunatization from int32 to int8. + + // We assume the above operations are performed and use these values above + // as ranges for Requantization_range_perchannel_op. + range_weights_ch1 = 663110.59; // For W1 channel + range_weights_ch2 = 994665.88; // For W2 Channel + + // We Fill the inputs Tensor T qint32 with arbitrary int32 values + test::FillValues( + &input_tensor_qint32, + {-1000, -2000, 2000, 4000, -3000, -6000, 4000, 8000, + 5000, 10000, -6000, -12000, 7000, 14000, 8000, 16000, + 9000, -18000, -10000, -20000, 11000, 22000, -12000, -24000, + 13000, 26000, 14000, 28000, -15000, -30000, 16000, 32000}); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // See test RequantizationRangePerChannelTest_Basic and/or + // test RequantizationRangePerChannelTest_ClipMax +} + +// Following tests the RequantizationRangePerChannel Op wherein the range +// of the weights is calculated per channel. +TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_Basic) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = pow(2, 31); + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define and input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // Define, Create and initalize the OP in question. + TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel", + "RequantizationRangePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("clip_value_max", clip_max_value) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Run the Kernel + TF_ASSERT_OK(RunOpKernel()); + + // Step 6: Verify Output and Store values to test Requantize_perchannel + // -------------------------------------------------------------------- + + // Verify the Expected Outputs + const float output_min = GetOutput(0)->flat()(0); + const float output_max = GetOutput(1)->flat()(0); + EXPECT_NEAR(-14.8217, output_min, 0.002); + EXPECT_NEAR(14.8217, output_max, 0.002); + + // output range is made use in RequantizePerChannelTest_Basic +} + +TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_ClipMax) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = 6; // Can be used as 6 for Relu 6 activations. + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define and input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // Define, Create and initalize the OP in question. + TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel", + "RequantizationRangePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("clip_value_max", clip_max_value) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Run the Kernel + TF_ASSERT_OK(RunOpKernel()); + + // Step 6: Verify Output and Store values to test Requantize_perchannel + // -------------------------------------------------------------------- + + // Verify the Expected Outputs + const float output_min = GetOutput(0)->flat()(0); + const float output_max = GetOutput(1)->flat()(0); + EXPECT_NEAR(-6.0, output_min, 0.002); // Values are Max as with clip_value + EXPECT_NEAR(6.0, output_max, 0.002); // Values are Max as with clip_value +} + +TEST_F(MklRequantizatedOpsTest, RequantizePerChannelTest_Basic) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = pow(2, 31); + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define an input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 7: Define and run requantize_perchannel + // -------------------------------------------- + // The Output of Requantization_range_op_per_channel which calculated the + // new ranges of int8 is fed to the requantize per channel op. + // Here the Values of Convolved T*W is converted from int32 to int8. + + TF_ASSERT_OK(NodeDefBuilder("requantize_per_channel", "RequantizePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("out_type", DataTypeToEnum::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Calculate the Min and max from Step 6 above + // in RequantizationRangePerChannelTest_Basic + float range_op_output_min = -14.8217; + float range_op_output_max = 14.8217; + + // Add the Requested_min and requested_max stored from Step 6. + AddInputFromArray(TensorShape({1}), {range_op_output_min}); + AddInputFromArray(TensorShape({1}), {range_op_output_max}); + + // Run the kernel + TF_ASSERT_OK(RunOpKernel()); + + // Verify the output with the expected output + Tensor output = *GetOutput(0); + const float output_min = GetOutput(1)->flat()(0); + const float output_max = GetOutput(2)->flat()(0); + EXPECT_NEAR(range_op_output_min, output_min, 0.002); + EXPECT_NEAR(range_op_output_max, output_max, 0.002); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc new file mode 100644 index 0000000000..f9aa550866 --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc @@ -0,0 +1,171 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS +#ifdef INTEL_MKL +#include + +#include "mkldnn.hpp" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class MklRequantizePerChannelOp : public OpKernel { + public: + explicit MklRequantizePerChannelOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_type_)); + OP_REQUIRES(ctx, out_type_ == DT_QINT8 || out_type_ == DT_QUINT8, + errors::InvalidArgument( + "out_type must be qint8 or quint8, but got: " + out_type_)); + } + virtual ~MklRequantizePerChannelOp() {} + void Compute(OpKernelContext* ctx) override { + try { + const Tensor& input = ctx->input(kInputTensorIndex); + const Tensor& input_min_vec = ctx->input(kInputMinVec); + float* input_min_vec_data = (float*)const_cast( + static_cast(input_min_vec.flat().data())); + const Tensor& input_max_vec = ctx->input(kInputMaxVec); + float* input_max_vec_data = (float*)const_cast( + static_cast(input_max_vec.flat().data())); + + const Tensor& input_requested_min = ctx->input(this->kRequestMin); + const float input_requested_min_float = + input_requested_min.flat()(0); + const Tensor& input_requested_max = ctx->input(this->kRequestMax); + const float input_requested_max_float = + input_requested_max.flat()(0); + + size_t depth = input_min_vec.NumElements(); + OP_REQUIRES( + ctx, input_min_vec.dim_size(0) == depth, + errors::InvalidArgument("min has incorrect size, expected ", depth, + " was ", input_min_vec.dim_size(0))); + OP_REQUIRES( + ctx, input_max_vec.dim_size(0) == depth, + errors::InvalidArgument("max has incorrect size, expected ", depth, + " was ", input_max_vec.dim_size(0))); + + if (out_type_ == DT_QINT8) CHECK(input_requested_min_float < 0.0f); + + const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f; + float requested_min_max = std::max(std::abs(input_requested_min_float), + std::abs(input_requested_max_float)); + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputTensorIndex, + input.shape(), &output)); + + std::vector scales(depth); + for (int i = 0; i < depth; i++) { + float min_max_from_vec = std::max(std::abs(input_min_vec_data[i]), + std::abs(input_max_vec_data[i])); + float scale = + factor * (min_max_from_vec / requested_min_max / (float)(1L << 31)); + scales[i] = scale; + } + + mkldnn::primitive_attr reorder_attr; + reorder_attr.set_output_scales(2, scales); + + memory::dims dims_mkl_order = + TFShapeToMklDnnDimsInNCHW(input.shape(), FORMAT_NHWC); + memory::desc input_md = memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc); + memory::desc output_md = + (out_type_ == DT_QINT8) + ? memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc) + : memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc); + + memory::primitive_desc input_pd = + memory::primitive_desc(input_md, cpu_engine_); + memory::primitive_desc output_pd = + memory::primitive_desc(output_md, cpu_engine_); + + void* input_buf = + static_cast(const_cast(input.flat().data())); + void* output_buf; + if (out_type_ == DT_QINT8) { + output_buf = static_cast( + const_cast(output->flat().data())); + } else { + output_buf = static_cast( + const_cast(output->flat().data())); + } + + std::unique_ptr input_mem_prim_(new memory(input_pd, input_buf)); + std::unique_ptr output_mem_prim_( + new memory(output_pd, output_buf)); + + mkldnn::reorder::primitive_desc reorder_pd = + mkldnn::reorder::primitive_desc(input_pd, output_pd, reorder_attr); + std::vector net; + net.push_back( + mkldnn::reorder(reorder_pd, *input_mem_prim_, *output_mem_prim_)); + stream(stream::kind::eager).submit(net).wait(); + + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + + output_min->flat()(0) = input_requested_min_float; + output_max->flat()(0) = input_requested_max_float; + + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + ctx, errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + const int kInputTensorIndex = 0; + const int kInputMinVec = 1; + const int kInputMaxVec = 2; + const int kRequestMin = 3; + const int kRequestMax = 4; + const int kOutputTensorIndex = 0; + const int kOutputMin = 1; + const int kOutputMax = 2; + DataType out_type_; + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizePerChannel") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("out_type"), + MklRequantizePerChannelOp); + +} // namespace tensorflow +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc index dce6f1a185..ab057d5a67 100644 --- a/tensorflow/core/kernels/requantize.cc +++ b/tensorflow/core/kernels/requantize.cc @@ -19,7 +19,6 @@ limitations under the License. #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" @@ -27,6 +26,7 @@ limitations under the License. #include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -100,4 +100,10 @@ REGISTER_KERNEL_BUILDER(Name("Requantize") .TypeConstraint("out_type"), RequantizeOp); +REGISTER_KERNEL_BUILDER(Name("Requantize") + .Device(DEVICE_CPU) + .TypeConstraint("Tinput") + .TypeConstraint("out_type"), + RequantizeOp); + } // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index eb9cbd3225..52243f2fcf 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1247,12 +1247,12 @@ Status RangeSize(const Tensor* start_t, const Tensor* limit_t, T limit = limit_t->scalar()(); T delta = delta_t->scalar()(); if (start > limit && delta > 0) { - return errors::InvalidArgument( - "Requires start <= limit when delta > 0: ", start, "/", limit); + return errors::InvalidArgument("Requires start <= limit when delta > 0: ", + start, "/", limit); } if (start < limit && delta < 0) { - return errors::InvalidArgument( - "Requires start >= limit when delta < 0: ", start, "/", limit); + return errors::InvalidArgument("Requires start >= limit when delta < 0: ", + start, "/", limit); } if (delta == 0) { return errors::InvalidArgument("Requires delta != 0"); @@ -1686,6 +1686,45 @@ Add two input tensors element wise using mkl kernel sum. inputs: Must all be the same size and shape. )doc"); +REGISTER_OP("RequantizePerChannel") + .Input("input: T") + .Input("input_min: float") + .Input("input_max: float") + .Input("requested_output_min: float") + .Input("requested_output_max: float") + .Output("output: out_type") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype = DT_QINT32") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }); +REGISTER_OP("RequantizationRangePerChannel") + .Input("input: T") + .Input("input_min: float") + .Input("input_max: float") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype = DT_QINT32") + .Attr("clip_value_max: float") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); + c->set_output(0, c->Scalar()); + c->set_output(1, c->Scalar()); + return Status::OK(); + }); + #endif // INTEL_MKL REGISTER_OP("NextAfter") -- GitLab From 96007e5022df35766b014a30d6fe4075cc1662cc Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 18 Jan 2019 16:53:38 -0800 Subject: [PATCH 0024/1185] requantization_op_perchannel_support --- tensorflow/core/BUILD | 1 + ...pi_def_RequantizationRangePerChannel.pbtxt | 4 + .../api_def_RequantizePerChannel.pbtxt | 4 + tensorflow/core/kernels/BUILD | 65 +++- .../core/kernels/mkl_quantized_conv_ops.h | 37 ++- ...mkl_requantization_range_per_channel_op.cc | 110 +++++++ .../core/kernels/mkl_requantize_ops_test.cc | 297 ++++++++++++++++++ .../kernels/mkl_requantize_per_channel_op.cc | 171 ++++++++++ tensorflow/core/kernels/requantize.cc | 8 +- tensorflow/core/ops/math_ops.cc | 47 ++- 10 files changed, 735 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt create mode 100644 tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc create mode 100644 tensorflow/core/kernels/mkl_requantize_ops_test.cc create mode 100644 tensorflow/core/kernels/mkl_requantize_per_channel_op.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c3d31f91b3..a99b03454d 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1450,6 +1450,7 @@ cc_library( "//tensorflow/core/kernels:mkl_identity_op", "//tensorflow/core/kernels:mkl_input_conversion_op", "//tensorflow/core/kernels:mkl_lrn_op", + "//tensorflow/core/kernels:mkl_requantize_ops", "//tensorflow/core/kernels:mkl_pooling_ops", "//tensorflow/core/kernels:mkl_relu_op", "//tensorflow/core/kernels:mkl_reshape_op", diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt new file mode 100644 index 0000000000..8874a53ec1 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt @@ -0,0 +1,4 @@ +op { +graph_op_name: + "RequantizationRangePerChannel" visibility : HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt new file mode 100644 index 0000000000..0e4cd40a36 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt @@ -0,0 +1,4 @@ +op { +graph_op_name: + "RequantizePerChannel" visibility : HIDDEN +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index a59ea5b631..26b8c691e1 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6104,7 +6104,9 @@ tf_kernel_library( tf_cc_test( name = "requantization_range_op_test", size = "small", - srcs = ["requantization_range_op_test.cc"], + srcs = [ + "requantization_range_op_test.cc", + ], deps = [ ":ops_testutil", ":ops_util", @@ -6981,6 +6983,67 @@ tf_mkl_kernel_library( deps = NN_DEPS + mkl_deps() + [":cwise_op"], ) +tf_mkl_kernel_library( + name = "mkl_requantize_ops", + srcs = [ + "mkl_requantization_range_per_channel_op.cc", + "mkl_requantize_per_channel_op.cc", + ], + hdrs = [ + "meta_support.h", + "no_op.h", + "reference_gemm.h", + ], + deps = if_mkl( + [ + ":concat_lib_hdrs", + ":conv_ops", + ":cwise_op", + ":eigen_helpers", + ":image_resizer_state", + ":ops_util", + ":pooling_ops", + ":quantization_utils", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:nn_ops_op_lib", + "//third_party/eigen3", + "@gemmlowp", + ":transpose_functor", + "@mkl_dnn", + "//third_party/mkl:intel_binary_blob", + ], + ), +) + +tf_cc_test_mkl( + name = "mkl_requantize_ops_test", + size = "small", + srcs = ["mkl_requantize_ops_test.cc"], + deps = [ + ":mkl_requantize_ops", + ":ops_testutil", + ":ops_util", + ":quantization_utils", + ":quantized_ops", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:array_ops_op_lib", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:math_ops_op_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test_mkl( name = "mkl_fused_ops_test", size = "small", diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 10825f6962..84a1ccb4c2 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -16,16 +16,18 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ #define TENSORFLOW_CORE_KERNELS_MKL_QUANTIZED_CONV_OPS_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #ifdef INTEL_MKL namespace tensorflow { template float MklFloatForOneQuantizedLevel(float range_min, float range_max) { - const int64 highest = static_cast(Eigen::NumTraits::highest()); - const int64 lowest = static_cast(Eigen::NumTraits::lowest()); + int64 highest = static_cast(Eigen::NumTraits::highest()); + int64 lowest = static_cast(Eigen::NumTraits::lowest()); + if (lowest < -highest) lowest += 1; + const float float_for_one_quantized_level = (range_max - range_min) / (highest - lowest); return float_for_one_quantized_level; @@ -48,6 +50,35 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, *min_c = c_float_for_one_quant_level * c_lowest; *max_c = c_float_for_one_quant_level * c_highest; } + +template +void MklQuantizationRangeForMultiplication(float min_a, float max_a, + const Tensor& min_b_vector, + const Tensor& max_b_vector, + Tensor** min_c_vector, + Tensor** max_c_vector) { + CHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements()); + CHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements()); + size_t n_channel = min_b_vector.NumElements(); + const int64 c_highest = static_cast(Eigen::NumTraits::highest()); + const int64 c_lowest = static_cast(Eigen::NumTraits::lowest()); + const float* min_b = min_b_vector.flat().data(); + const float* max_b = max_b_vector.flat().data(); + float* min_c = (*min_c_vector)->flat().data(); + float* max_c = (*max_c_vector)->flat().data(); +#pragma omp parallel for + for (size_t n = 0; n < n_channel; n++) { + float a_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_a, max_a); + float b_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_b[n], max_b[n]); + float c_float_for_one_quant_level = + a_float_for_one_quant_level * b_float_for_one_quant_level; + min_c[n] = c_float_for_one_quant_level * c_lowest; + max_c[n] = c_float_for_one_quant_level * c_highest; + } +} + } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc new file mode 100644 index 0000000000..d23a2d320a --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. +#ifdef INTEL_MKL +#define EIGEN_USE_THREADS + +#include +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +class MklRequantizationRangePerChannelOp : public OpKernel { + public: + explicit MklRequantizationRangePerChannelOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("clip_value_max", &clip_value_max_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(kInputTensorIndex); + const Tensor& input_min = ctx->input(kInputMin); + const Tensor& input_max = ctx->input(kInputMax); + + size_t depth = input_max.NumElements(); + OP_REQUIRES(ctx, input_min.dim_size(0) == depth, + errors::InvalidArgument("min has incorrect size, expected ", + depth, " was ", input_min.dim_size(0))); + OP_REQUIRES(ctx, input_max.dim_size(0) == depth, + errors::InvalidArgument("max has incorrect size, expected ", + depth, " was ", input_max.dim_size(0))); + + const float* input_min_data = input_min.flat().data(); + const float* input_max_data = input_max.flat().data(); + std::vector ranges(depth); + bool is_non_negative = true; + Eigen::array shuffling({1, 0}); + auto input_matrix = input.flat_inner_dims(); + auto transposed_input = input_matrix.shuffle(shuffling); + +#pragma omp parallel for + for (size_t i = 0; i < depth; i++) { + Eigen::Tensor min = + transposed_input.chip<0>(i).minimum(); + Eigen::Tensor max = + transposed_input.chip<0>(i).maximum(); + int32_t min_per_channel = min(); + int32_t max_per_channel = max(); + int32_t abs_max = + std::max(std::abs(min_per_channel), std::abs(max_per_channel)); + float scale = + std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); + ranges[i] = (scale * (float)abs_max / (float)(1L << 31)); + if (min_per_channel < 0) is_non_negative = false; + } + + float out_min_max = std::numeric_limits::min(); + for (size_t i = 0; i < depth; i++) { + if (out_min_max < ranges[i]) out_min_max = ranges[i]; + } + // Fixing max to clip_value_max_ (example 6.0 to support relu6) + if (out_min_max > clip_value_max_) out_min_max = clip_value_max_; + + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + output_min->flat()(0) = is_non_negative ? 0.0f : out_min_max * -1.0f; + output_max->flat()(0) = out_min_max; + } + + private: + float clip_value_max_ = std::numeric_limits::infinity(); + const int kInputTensorIndex = 0; + const int kInputMin = 1; + const int kInputMax = 2; + const int kOutputMin = 0; + const int kOutputMax = 1; +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizationRangePerChannel") + .Device(DEVICE_CPU) + .TypeConstraint("T"), + MklRequantizationRangePerChannelOp); +} // namespace tensorflow +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_requantize_ops_test.cc b/tensorflow/core/kernels/mkl_requantize_ops_test.cc new file mode 100644 index 0000000000..44ab936471 --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantize_ops_test.cc @@ -0,0 +1,297 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" + +#include + +namespace tensorflow { + +class MklRequantizatedOpsTest : public OpsTestBase {}; + +class MklRequantizatedOpsTestHelper : public OpsTestBase { + public: + void Setup(Tensor &input_tensor_qint32, float &range_weights_ch1, + float &range_weights_ch2); + void TestBody(){}; +}; + +void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32, + float &range_weights_ch1, + float &range_weights_ch2) { + // Step 1: Assumption of inputs + // ---------------------------- + // Assume input Tensor T (NHWC) in FP32 has range [0, 5.0] size nt*ht*wt*ct + // Assume input Filter W (NHWC) with 2 output channels of size nw*ht**wt*2 + // logically, Filter W has 2 channels W1 and W2 each of size nw*ht**wt*1 + // Assume input Filter W1(NHWC) in FP32 has range [-2.0, 2.0]size nw*ht**wt*1 + // Assume input Filter W2(NHWC) in FP32 has range [-3.0, 3.0]size nw*ht**wt*1 + + // Step 2: Assumption of Quantizing inputs and weights (per channel) + // ------------------------------------------------------------------ + // When these 2 Tensors, T and W are quantized using a Quantize Op. + // When the input Tensor T (NHWC) is quantized to unsigned int8. + // While the input Filter W (NHWC) is qunatized to signed int8. + // hence T max value is mapped to ((2^8-1) = 255) while W to ((2^7)-1 = 127)) + + // Range of Quantized T in int8[0 , 255] maps to orig T in FP32[0 , 5.0] + // Range of Quantized W1 in int8[-127, 127] maps to orig W1 in FP32[-2.0, 2.0] + // Range of Quantized W2 in int8[-127, 127] maps to orig W2 in FP32[-3.0, 3.0] + + // Hence the resolution of Quantized T will be 5.0/255 + // Hence the resolution of Quantized W1 will be 2.0/127 + // Hence the resolution of Quantized W2 will be 3.0/127 + + // Step 3: Assumption of quantizedconv on quantized input&weights(per channel) + // --------------------------------------------------------------------------- + // The input T and weights W1 (or W2) will be convolved (and multipled) + // The output Tensor T is in int32 whose range is [-2^31, 2^31] + // The Range of the Convolved T*W1 is 2^31 * 5.0/255 * 2.0/127 = 663110.59 + // So Range of Convolved T*W1 in int32[-2^31, 22^31] that maps to + // orig T Range in FP32[0,5.0] * [-2.0, 2.0] is [-663110.59, 663110.59] + + // The Range of the Convolved T*W2 is 2^31 * 5.0/255 * 3.0/127 = 994665.88 + // So Range of Convolved T*W2 in int32[-2^31, 22^31] that maps to + // orig T Range in FP32[0,5.0] * [-3.0, 3.0] is [-994665.88, 994665.88] + + // Step 4: Assumption output above is fed to Requantization_range_perchannel + // -------------------------------------------------------------------------- + // Here we recalculate the new Range for Convolved T*W so that we + // make good use in int8 qunatization from int32 to int8. + + // We assume the above operations are performed and use these values above + // as ranges for Requantization_range_perchannel_op. + range_weights_ch1 = 663110.59; // For W1 channel + range_weights_ch2 = 994665.88; // For W2 Channel + + // We Fill the inputs Tensor T qint32 with arbitrary int32 values + test::FillValues( + &input_tensor_qint32, + {-1000, -2000, 2000, 4000, -3000, -6000, 4000, 8000, + 5000, 10000, -6000, -12000, 7000, 14000, 8000, 16000, + 9000, -18000, -10000, -20000, 11000, 22000, -12000, -24000, + 13000, 26000, 14000, 28000, -15000, -30000, 16000, 32000}); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // See test RequantizationRangePerChannelTest_Basic and/or + // test RequantizationRangePerChannelTest_ClipMax +} + +// Following tests the RequantizationRangePerChannel Op wherein the range +// of the weights is calculated per channel. +TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_Basic) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = pow(2, 31); + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define and input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // Define, Create and initalize the OP in question. + TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel", + "RequantizationRangePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("clip_value_max", clip_max_value) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Run the Kernel + TF_ASSERT_OK(RunOpKernel()); + + // Step 6: Verify Output and Store values to test Requantize_perchannel + // -------------------------------------------------------------------- + + // Verify the Expected Outputs + const float output_min = GetOutput(0)->flat()(0); + const float output_max = GetOutput(1)->flat()(0); + EXPECT_NEAR(-14.8217, output_min, 0.002); + EXPECT_NEAR(14.8217, output_max, 0.002); + + // output range is made use in RequantizePerChannelTest_Basic +} + +TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_ClipMax) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = 6; // Can be used as 6 for Relu 6 activations. + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define and input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 5: Define and run requantization_range_perchannel + // ------------------------------------------------------- + // Define, Create and initalize the OP in question. + TF_ASSERT_OK(NodeDefBuilder("requantization_range_per_channel", + "RequantizationRangePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("clip_value_max", clip_max_value) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Run the Kernel + TF_ASSERT_OK(RunOpKernel()); + + // Step 6: Verify Output and Store values to test Requantize_perchannel + // -------------------------------------------------------------------- + + // Verify the Expected Outputs + const float output_min = GetOutput(0)->flat()(0); + const float output_max = GetOutput(1)->flat()(0); + EXPECT_NEAR(-6.0, output_min, 0.002); // Values are Max as with clip_value + EXPECT_NEAR(6.0, output_max, 0.002); // Values are Max as with clip_value +} + +TEST_F(MklRequantizatedOpsTest, RequantizePerChannelTest_Basic) { + // Let us setup the tensor and inputs before we run this op. + float clip_max_value = pow(2, 31); + float range_weights_ch1 = 0.0; + float range_weights_ch2 = 0.0; + + // Create the input tensor + const int input_height = 4; + const int input_width = 4; + const int input_channels = 2; + + // define an input tensor T shape. + Tensor input_tensor_qint32(DT_QINT32, + {1, input_height, input_width, input_channels}); + + // Explanation and setup prior to this Op. Fill T and populate range values. + MklRequantizatedOpsTestHelper helper; + helper.Setup(input_tensor_qint32, range_weights_ch1, range_weights_ch2); + + // Step 7: Define and run requantize_perchannel + // -------------------------------------------- + // The Output of Requantization_range_op_per_channel which calculated the + // new ranges of int8 is fed to the requantize per channel op. + // Here the Values of Convolved T*W is converted from int32 to int8. + + TF_ASSERT_OK(NodeDefBuilder("requantize_per_channel", "RequantizePerChannel") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("T", DataTypeToEnum::v()) + .Attr("out_type", DataTypeToEnum::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // Add the Input Nodes to the Op. + AddInputFromArray(input_tensor_qint32.shape(), + input_tensor_qint32.flat()); + + // Calculate the Min and max from the ranges + float ch1_min = -1.0 * range_weights_ch1; + float ch1_max = range_weights_ch1; + float ch2_min = -1.0 * range_weights_ch2; + float ch2_max = range_weights_ch2; + + // Add the Perchannel range Nodes to the Op. + AddInputFromArray(TensorShape({input_channels}), {ch1_min, ch2_min}); + AddInputFromArray(TensorShape({input_channels}), {ch1_max, ch2_max}); + + // Calculate the Min and max from Step 6 above + // in RequantizationRangePerChannelTest_Basic + float range_op_output_min = -14.8217; + float range_op_output_max = 14.8217; + + // Add the Requested_min and requested_max stored from Step 6. + AddInputFromArray(TensorShape({1}), {range_op_output_min}); + AddInputFromArray(TensorShape({1}), {range_op_output_max}); + + // Run the kernel + TF_ASSERT_OK(RunOpKernel()); + + // Verify the output with the expected output + Tensor output = *GetOutput(0); + const float output_min = GetOutput(1)->flat()(0); + const float output_max = GetOutput(2)->flat()(0); + EXPECT_NEAR(range_op_output_min, output_min, 0.002); + EXPECT_NEAR(range_op_output_max, output_max, 0.002); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc new file mode 100644 index 0000000000..f9aa550866 --- /dev/null +++ b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc @@ -0,0 +1,171 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS +#ifdef INTEL_MKL +#include + +#include "mkldnn.hpp" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/no_op.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class MklRequantizePerChannelOp : public OpKernel { + public: + explicit MklRequantizePerChannelOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("out_type", &out_type_)); + OP_REQUIRES(ctx, out_type_ == DT_QINT8 || out_type_ == DT_QUINT8, + errors::InvalidArgument( + "out_type must be qint8 or quint8, but got: " + out_type_)); + } + virtual ~MklRequantizePerChannelOp() {} + void Compute(OpKernelContext* ctx) override { + try { + const Tensor& input = ctx->input(kInputTensorIndex); + const Tensor& input_min_vec = ctx->input(kInputMinVec); + float* input_min_vec_data = (float*)const_cast( + static_cast(input_min_vec.flat().data())); + const Tensor& input_max_vec = ctx->input(kInputMaxVec); + float* input_max_vec_data = (float*)const_cast( + static_cast(input_max_vec.flat().data())); + + const Tensor& input_requested_min = ctx->input(this->kRequestMin); + const float input_requested_min_float = + input_requested_min.flat()(0); + const Tensor& input_requested_max = ctx->input(this->kRequestMax); + const float input_requested_max_float = + input_requested_max.flat()(0); + + size_t depth = input_min_vec.NumElements(); + OP_REQUIRES( + ctx, input_min_vec.dim_size(0) == depth, + errors::InvalidArgument("min has incorrect size, expected ", depth, + " was ", input_min_vec.dim_size(0))); + OP_REQUIRES( + ctx, input_max_vec.dim_size(0) == depth, + errors::InvalidArgument("max has incorrect size, expected ", depth, + " was ", input_max_vec.dim_size(0))); + + if (out_type_ == DT_QINT8) CHECK(input_requested_min_float < 0.0f); + + const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f; + float requested_min_max = std::max(std::abs(input_requested_min_float), + std::abs(input_requested_max_float)); + Tensor* output = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputTensorIndex, + input.shape(), &output)); + + std::vector scales(depth); + for (int i = 0; i < depth; i++) { + float min_max_from_vec = std::max(std::abs(input_min_vec_data[i]), + std::abs(input_max_vec_data[i])); + float scale = + factor * (min_max_from_vec / requested_min_max / (float)(1L << 31)); + scales[i] = scale; + } + + mkldnn::primitive_attr reorder_attr; + reorder_attr.set_output_scales(2, scales); + + memory::dims dims_mkl_order = + TFShapeToMklDnnDimsInNCHW(input.shape(), FORMAT_NHWC); + memory::desc input_md = memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc); + memory::desc output_md = + (out_type_ == DT_QINT8) + ? memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc) + : memory::desc(dims_mkl_order, MklDnnType(), + memory::format::nhwc); + + memory::primitive_desc input_pd = + memory::primitive_desc(input_md, cpu_engine_); + memory::primitive_desc output_pd = + memory::primitive_desc(output_md, cpu_engine_); + + void* input_buf = + static_cast(const_cast(input.flat().data())); + void* output_buf; + if (out_type_ == DT_QINT8) { + output_buf = static_cast( + const_cast(output->flat().data())); + } else { + output_buf = static_cast( + const_cast(output->flat().data())); + } + + std::unique_ptr input_mem_prim_(new memory(input_pd, input_buf)); + std::unique_ptr output_mem_prim_( + new memory(output_pd, output_buf)); + + mkldnn::reorder::primitive_desc reorder_pd = + mkldnn::reorder::primitive_desc(input_pd, output_pd, reorder_attr); + std::vector net; + net.push_back( + mkldnn::reorder(reorder_pd, *input_mem_prim_, *output_mem_prim_)); + stream(stream::kind::eager).submit(net).wait(); + + Tensor* output_min = nullptr; + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + + output_min->flat()(0) = input_requested_min_float; + output_max->flat()(0) = input_requested_max_float; + + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + ctx, errors::Aborted("Operation received an exception:", error_msg)); + } + } + + private: + const int kInputTensorIndex = 0; + const int kInputMinVec = 1; + const int kInputMaxVec = 2; + const int kRequestMin = 3; + const int kRequestMax = 4; + const int kOutputTensorIndex = 0; + const int kOutputMin = 1; + const int kOutputMax = 2; + DataType out_type_; + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizePerChannel") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .TypeConstraint("out_type"), + MklRequantizePerChannelOp); + +} // namespace tensorflow +#endif // INTEL_MKL diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc index dce6f1a185..ab057d5a67 100644 --- a/tensorflow/core/kernels/requantize.cc +++ b/tensorflow/core/kernels/requantize.cc @@ -19,7 +19,6 @@ limitations under the License. #include -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" @@ -27,6 +26,7 @@ limitations under the License. #include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -100,4 +100,10 @@ REGISTER_KERNEL_BUILDER(Name("Requantize") .TypeConstraint("out_type"), RequantizeOp); +REGISTER_KERNEL_BUILDER(Name("Requantize") + .Device(DEVICE_CPU) + .TypeConstraint("Tinput") + .TypeConstraint("out_type"), + RequantizeOp); + } // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index eb9cbd3225..52243f2fcf 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1247,12 +1247,12 @@ Status RangeSize(const Tensor* start_t, const Tensor* limit_t, T limit = limit_t->scalar()(); T delta = delta_t->scalar()(); if (start > limit && delta > 0) { - return errors::InvalidArgument( - "Requires start <= limit when delta > 0: ", start, "/", limit); + return errors::InvalidArgument("Requires start <= limit when delta > 0: ", + start, "/", limit); } if (start < limit && delta < 0) { - return errors::InvalidArgument( - "Requires start >= limit when delta < 0: ", start, "/", limit); + return errors::InvalidArgument("Requires start >= limit when delta < 0: ", + start, "/", limit); } if (delta == 0) { return errors::InvalidArgument("Requires delta != 0"); @@ -1686,6 +1686,45 @@ Add two input tensors element wise using mkl kernel sum. inputs: Must all be the same size and shape. )doc"); +REGISTER_OP("RequantizePerChannel") + .Input("input: T") + .Input("input_min: float") + .Input("input_max: float") + .Input("requested_output_min: float") + .Input("requested_output_max: float") + .Output("output: out_type") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype = DT_QINT32") + .Attr("out_type: quantizedtype = DT_QUINT8") + .SetShapeFn([](InferenceContext* c) { + TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c)); + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); + c->set_output(1, c->Scalar()); + c->set_output(2, c->Scalar()); + return Status::OK(); + }); +REGISTER_OP("RequantizationRangePerChannel") + .Input("input: T") + .Input("input_min: float") + .Input("input_max: float") + .Output("output_min: float") + .Output("output_max: float") + .Attr("T: quantizedtype = DT_QINT32") + .Attr("clip_value_max: float") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); + c->set_output(0, c->Scalar()); + c->set_output(1, c->Scalar()); + return Status::OK(); + }); + #endif // INTEL_MKL REGISTER_OP("NextAfter") -- GitLab From e3730df4a767be6ab8ebfeca6228eae8168bd342 Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 25 Jan 2019 16:42:05 -0800 Subject: [PATCH 0025/1185] fix api_test to succeed in Eigen along with config=mkl option --- tensorflow/core/ops/math_ops.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 52243f2fcf..b6d5f353b3 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1686,6 +1686,8 @@ Add two input tensors element wise using mkl kernel sum. inputs: Must all be the same size and shape. )doc"); +#endif // INTEL_MKL + REGISTER_OP("RequantizePerChannel") .Input("input: T") .Input("input_min: float") @@ -1725,8 +1727,6 @@ REGISTER_OP("RequantizationRangePerChannel") return Status::OK(); }); -#endif // INTEL_MKL - REGISTER_OP("NextAfter") .Attr("T: {float64, float32} = DT_FLOAT") .Input("x1: T") -- GitLab From e7ab0ddbf811866de60b8b317887f7000586b6d8 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 26 Jan 2019 09:35:08 +0800 Subject: [PATCH 0026/1185] Add test for int64 clamping --- .../xla/service/hlo_evaluator_test.cc | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 590f76f472..94a1ce08d2 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -152,6 +152,33 @@ TEST_P(HloEvaluatorBf16Test, DoesClamp) { EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); } +// Verifies that clamping of int64 does not cause loss of precision +TEST_P(HloEvaluatorBf16Test, DoesClamp) { + auto ones = [](int bits) -> int64 { return (1LL << bits) - 1; }; + + auto low = + LiteralUtil::CreateR2({{0, ones(54)}, {ones(54), ones(58)}}); + auto value = LiteralUtil::CreateR2({{0, ones(56)}, {0, ones(58)}}); + auto high = LiteralUtil::CreateR2( + {{ones(54), ones(55)}, {ones(56), ones(58)}}); + + Shape shape = low.shape(); + HloComputation::Builder b(TestName()); + auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low))); + auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value))); + auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high))); + b.AddInstruction( + HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3)); + m_->AddEntryComputation(b.Build()); + + Literal result = Evaluate(); + + auto expected = + LiteralUtil::CreateR2({{0, ones(55)}, {ones(54), ones(58)}}); + + EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); +} + TEST_P(HloEvaluatorBf16Test, DISABLED_DoesClampSpecialBroadcast) { auto low = LiteralUtil::CreateR0(0.f); auto value = LiteralUtil::CreateR2({{-1.f, 0.f}, {1.f, 2.f}}); -- GitLab From 16a51f86a10e0dd2ef5395f510b8ed0b696531b3 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 26 Jan 2019 09:38:01 +0800 Subject: [PATCH 0027/1185] Fix test name --- tensorflow/compiler/xla/service/hlo_evaluator_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 94a1ce08d2..f5452c9bd3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -153,7 +153,7 @@ TEST_P(HloEvaluatorBf16Test, DoesClamp) { } // Verifies that clamping of int64 does not cause loss of precision -TEST_P(HloEvaluatorBf16Test, DoesClamp) { +TEST_P(HloEvaluatorBf16Test, DoesClampInt64) { auto ones = [](int bits) -> int64 { return (1LL << bits) - 1; }; auto low = -- GitLab From a183c36fdf1e7bfb1585455517e6053aa61b867c Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 26 Jan 2019 09:48:33 +0800 Subject: [PATCH 0028/1185] Address nits --- tensorflow/compiler/xla/service/hlo_evaluator_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index f5452c9bd3..644dcd0ce7 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -154,7 +154,7 @@ TEST_P(HloEvaluatorBf16Test, DoesClamp) { // Verifies that clamping of int64 does not cause loss of precision TEST_P(HloEvaluatorBf16Test, DoesClampInt64) { - auto ones = [](int bits) -> int64 { return (1LL << bits) - 1; }; + auto ones = [](int bits) { return (int64{1} << bits) - 1; }; auto low = LiteralUtil::CreateR2({{0, ones(54)}, {ones(54), ones(58)}}); -- GitLab From d52804582d9f2d1522cff1341d2b17bc83bd7685 Mon Sep 17 00:00:00 2001 From: nammbash Date: Tue, 29 Jan 2019 09:50:37 -0800 Subject: [PATCH 0029/1185] fix formatting manually for pbtxt and ignoring clang formatting --- .../base_api/api_def_RequantizationRangePerChannel.pbtxt | 4 ++-- .../core/api_def/base_api/api_def_RequantizePerChannel.pbtxt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt index 8874a53ec1..2226027a42 100644 --- a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt @@ -1,4 +1,4 @@ op { -graph_op_name: - "RequantizationRangePerChannel" visibility : HIDDEN +graph_op_name: "RequantizationRangePerChannel" +visibility : HIDDEN } diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt index 0e4cd40a36..c7bfc0a051 100644 --- a/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_RequantizePerChannel.pbtxt @@ -1,4 +1,4 @@ op { -graph_op_name: - "RequantizePerChannel" visibility : HIDDEN +graph_op_name: "RequantizePerChannel" +visibility : HIDDEN } -- GitLab From 17198dd48951b27def39d653eafa21dac3c72939 Mon Sep 17 00:00:00 2001 From: nammbash Date: Tue, 29 Jan 2019 16:03:26 -0800 Subject: [PATCH 0030/1185] Review changes --- tensorflow/core/kernels/BUILD | 13 ++-- .../core/kernels/mkl_quantized_conv_ops.h | 11 ++-- ...mkl_requantization_range_per_channel_op.cc | 32 ++++----- .../core/kernels/mkl_requantize_ops_test.cc | 66 +++++++++---------- .../kernels/mkl_requantize_per_channel_op.cc | 37 +++++------ 5 files changed, 80 insertions(+), 79 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 26b8c691e1..f170142105 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6104,9 +6104,7 @@ tf_kernel_library( tf_cc_test( name = "requantization_range_op_test", size = "small", - srcs = [ - "requantization_range_op_test.cc", - ], + srcs = ["requantization_range_op_test.cc"], deps = [ ":ops_testutil", ":ops_util", @@ -6994,8 +6992,7 @@ tf_mkl_kernel_library( "no_op.h", "reference_gemm.h", ], - deps = if_mkl( - [ + deps = if_mkl([ ":concat_lib_hdrs", ":conv_ops", ":cwise_op", @@ -7004,6 +7001,7 @@ tf_mkl_kernel_library( ":ops_util", ":pooling_ops", ":quantization_utils", + ":transpose_functor", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -7011,10 +7009,9 @@ tf_mkl_kernel_library( "//tensorflow/core:math_ops_op_lib", "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", - "@gemmlowp", - ":transpose_functor", - "@mkl_dnn", "//third_party/mkl:intel_binary_blob", + "@gemmlowp", + "@mkl_dnn", ], ), ) diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 84a1ccb4c2..985081486b 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -26,7 +26,10 @@ template float MklFloatForOneQuantizedLevel(float range_min, float range_max) { int64 highest = static_cast(Eigen::NumTraits::highest()); int64 lowest = static_cast(Eigen::NumTraits::lowest()); - if (lowest < -highest) lowest += 1; + + // Adjusting for having a symmetric range. + // for example: for 8-bit [-127, 127] as opposed to [-128, 127]. + if (lowest < -highest) ++lowest; const float float_for_one_quantized_level = (range_max - range_min) / (highest - lowest); @@ -57,8 +60,8 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, const Tensor& max_b_vector, Tensor** min_c_vector, Tensor** max_c_vector) { - CHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements()); - CHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements()); + DCHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements()); + DCHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements()); size_t n_channel = min_b_vector.NumElements(); const int64 c_highest = static_cast(Eigen::NumTraits::highest()); const int64 c_lowest = static_cast(Eigen::NumTraits::lowest()); @@ -67,7 +70,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, float* min_c = (*min_c_vector)->flat().data(); float* max_c = (*max_c_vector)->flat().data(); #pragma omp parallel for - for (size_t n = 0; n < n_channel; n++) { + for (size_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = MklFloatForOneQuantizedLevel(min_a, max_a); float b_float_for_one_quant_level = diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc index d23a2d320a..78ef2fc4a5 100644 --- a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,15 +43,15 @@ class MklRequantizationRangePerChannelOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(kInputTensorIndex); - const Tensor& input_min = ctx->input(kInputMin); - const Tensor& input_max = ctx->input(kInputMax); + const Tensor& input_min = ctx->input(kInputMinIndex); + const Tensor& input_max = ctx->input(kInputMaxIndex); size_t depth = input_max.NumElements(); OP_REQUIRES(ctx, input_min.dim_size(0) == depth, - errors::InvalidArgument("min has incorrect size, expected ", + errors::InvalidArgument("input_min has incorrect size, expected ", depth, " was ", input_min.dim_size(0))); OP_REQUIRES(ctx, input_max.dim_size(0) == depth, - errors::InvalidArgument("max has incorrect size, expected ", + errors::InvalidArgument("input_max has incorrect size, expected ", depth, " was ", input_max.dim_size(0))); const float* input_min_data = input_min.flat().data(); @@ -62,8 +62,9 @@ class MklRequantizationRangePerChannelOp : public OpKernel { auto input_matrix = input.flat_inner_dims(); auto transposed_input = input_matrix.shuffle(shuffling); + // Find the ranges of each channel in parallel. #pragma omp parallel for - for (size_t i = 0; i < depth; i++) { + for (size_t i = 0; i < depth; ++i) { Eigen::Tensor min = transposed_input.chip<0>(i).minimum(); Eigen::Tensor max = @@ -74,12 +75,13 @@ class MklRequantizationRangePerChannelOp : public OpKernel { std::max(std::abs(min_per_channel), std::abs(max_per_channel)); float scale = std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); - ranges[i] = (scale * (float)abs_max / (float)(1L << 31)); + ranges[i] = scale * static_cast(abs_max) / static_cast(1L << 31); if (min_per_channel < 0) is_non_negative = false; } + // Obtain ranges of out_min_max after all parallel_for openMP threads have joined. float out_min_max = std::numeric_limits::min(); - for (size_t i = 0; i < depth; i++) { + for (size_t i = 0; i < depth; ++i) { if (out_min_max < ranges[i]) out_min_max = ranges[i]; } // Fixing max to clip_value_max_ (example 6.0 to support relu6) @@ -87,19 +89,19 @@ class MklRequantizationRangePerChannelOp : public OpKernel { Tensor* output_min = nullptr; Tensor* output_max = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); - OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); - output_min->flat()(0) = is_non_negative ? 0.0f : out_min_max * -1.0f; + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMinIndex, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMaxIndex, {}, &output_max)); + output_min->flat()(0) = is_non_negative ? 0.0f : -out_min_max; output_max->flat()(0) = out_min_max; } private: float clip_value_max_ = std::numeric_limits::infinity(); const int kInputTensorIndex = 0; - const int kInputMin = 1; - const int kInputMax = 2; - const int kOutputMin = 0; - const int kOutputMax = 1; + const int kInputMinIndex = 1; + const int kInputMaxIndexIndex = 2; + const int kOutputMinIndex = 0; + const int kOutputMaxIndex = 1; }; REGISTER_KERNEL_BUILDER(Name("RequantizationRangePerChannel") diff --git a/tensorflow/core/kernels/mkl_requantize_ops_test.cc b/tensorflow/core/kernels/mkl_requantize_ops_test.cc index 44ab936471..ebc7687271 100644 --- a/tensorflow/core/kernels/mkl_requantize_ops_test.cc +++ b/tensorflow/core/kernels/mkl_requantize_ops_test.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/fake_input.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -27,8 +29,6 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" -#include - namespace tensorflow { class MklRequantizatedOpsTest : public OpsTestBase {}; @@ -43,24 +43,24 @@ class MklRequantizatedOpsTestHelper : public OpsTestBase { void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32, float &range_weights_ch1, float &range_weights_ch2) { - // Step 1: Assumption of inputs - // ---------------------------- + // Step 1: Input range assumptions + // ------------------------------- // Assume input Tensor T (NHWC) in FP32 has range [0, 5.0] size nt*ht*wt*ct - // Assume input Filter W (NHWC) with 2 output channels of size nw*ht**wt*2 - // logically, Filter W has 2 channels W1 and W2 each of size nw*ht**wt*1 - // Assume input Filter W1(NHWC) in FP32 has range [-2.0, 2.0]size nw*ht**wt*1 - // Assume input Filter W2(NHWC) in FP32 has range [-3.0, 3.0]size nw*ht**wt*1 - - // Step 2: Assumption of Quantizing inputs and weights (per channel) - // ------------------------------------------------------------------ - // When these 2 Tensors, T and W are quantized using a Quantize Op. - // When the input Tensor T (NHWC) is quantized to unsigned int8. - // While the input Filter W (NHWC) is qunatized to signed int8. - // hence T max value is mapped to ((2^8-1) = 255) while W to ((2^7)-1 = 127)) - - // Range of Quantized T in int8[0 , 255] maps to orig T in FP32[0 , 5.0] - // Range of Quantized W1 in int8[-127, 127] maps to orig W1 in FP32[-2.0, 2.0] - // Range of Quantized W2 in int8[-127, 127] maps to orig W2 in FP32[-3.0, 3.0] + // Assume input Filter W (NHWC) with 2 output channels of size nw*ht*wt*2 + // logically, Filter W has 2 channels W1 and W2 each of size nw*ht*wt*1 + // Assume input Filter W1(NHWC) in FP32 has range [-2.0, 2.0]size nw*ht*wt*1 + // Assume input Filter W2(NHWC) in FP32 has range [-3.0, 3.0]size nw*ht*wt*1 + + // Step 2: Quantization details (per channel) + // ------------------------------------------ + // T and W are quantized using a Quantize Op. + // The input Tensor T (NHWC) is quantized to unsigned int8. + // The input Filter W (NHWC) is quantized to signed int8. + // Hence T's max value is mapped to ((2^8-1) = 255), while W's to ((2^7)-1 = 127)). + + // Range of Quantized T in uint8[0 , 255] maps to orig T in FP32[0 , 5.0] + // Range of Quantized W1 in int8[-127, 127] maps to orig W1 in FP32[-2.0, 2.0] + // Range of Quantized W2 in int8[-127, 127] maps to orig W2 in FP32[-3.0, 3.0] // Hence the resolution of Quantized T will be 5.0/255 // Hence the resolution of Quantized W1 will be 2.0/127 @@ -68,20 +68,20 @@ void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32, // Step 3: Assumption of quantizedconv on quantized input&weights(per channel) // --------------------------------------------------------------------------- - // The input T and weights W1 (or W2) will be convolved (and multipled) - // The output Tensor T is in int32 whose range is [-2^31, 2^31] - // The Range of the Convolved T*W1 is 2^31 * 5.0/255 * 2.0/127 = 663110.59 - // So Range of Convolved T*W1 in int32[-2^31, 22^31] that maps to + // The input T and weights W1 (or W2) will be convolved. + // The output Tensor T is in int32 whose range is [-(2^31-1), 2^31-1) + // The Range of the convolved T*W1 is ((2^31)-1) * 5.0/255 * 2.0/127 = 663110.59 + // So the range of convolved T*W1 in int32[-(2^31-1), 22^31-1] that maps to // orig T Range in FP32[0,5.0] * [-2.0, 2.0] is [-663110.59, 663110.59] - // The Range of the Convolved T*W2 is 2^31 * 5.0/255 * 3.0/127 = 994665.88 - // So Range of Convolved T*W2 in int32[-2^31, 22^31] that maps to - // orig T Range in FP32[0,5.0] * [-3.0, 3.0] is [-994665.88, 994665.88] + // The Range of the convolved T*W2 is 2^31-1 * 5.0/255 * 3.0/127 = 994665.88 + // So Range of convolved T*W2 in int32[-(2^31-1), 22^31-1] that maps to + // orig T Range in FP32 [0, 5.0] * [-3.0, 3.0] is [-994665.88, 994665.88] // Step 4: Assumption output above is fed to Requantization_range_perchannel // -------------------------------------------------------------------------- - // Here we recalculate the new Range for Convolved T*W so that we - // make good use in int8 qunatization from int32 to int8. + // Here we recalculate the new Range for convolved T*W so that we + // make good use in int8 quantization from int32 to int8. // We assume the above operations are performed and use these values above // as ranges for Requantization_range_perchannel_op. @@ -102,10 +102,10 @@ void MklRequantizatedOpsTestHelper::Setup(Tensor &input_tensor_qint32, // test RequantizationRangePerChannelTest_ClipMax } -// Following tests the RequantizationRangePerChannel Op wherein the range +// Tests the RequantizationRangePerChannel Op wherein the range // of the weights is calculated per channel. TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_Basic) { - // Let us setup the tensor and inputs before we run this op. + // Let us set up the tensor and inputs before we run this op. float clip_max_value = pow(2, 31); float range_weights_ch1 = 0.0; float range_weights_ch2 = 0.0; @@ -115,7 +115,7 @@ TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_Basic) { const int input_width = 4; const int input_channels = 2; - // define and input tensor T shape. + // Define the shape of T. Tensor input_tensor_qint32(DT_QINT32, {1, input_height, input_width, input_channels}); @@ -202,9 +202,9 @@ TEST_F(MklRequantizatedOpsTest, RequantizationRangePerChannelTest_ClipMax) { input_tensor_qint32.flat()); // Calculate the Min and max from the ranges - float ch1_min = -1.0 * range_weights_ch1; + float ch1_min = -range_weights_ch1; float ch1_max = range_weights_ch1; - float ch2_min = -1.0 * range_weights_ch2; + float ch2_min = -range_weights_ch2; float ch2_max = range_weights_ch2; // Add the Perchannel range Nodes to the Op. diff --git a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc index f9aa550866..e531ec8931 100644 --- a/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl_requantize_per_channel_op.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -48,31 +48,31 @@ class MklRequantizePerChannelOp : public OpKernel { void Compute(OpKernelContext* ctx) override { try { const Tensor& input = ctx->input(kInputTensorIndex); - const Tensor& input_min_vec = ctx->input(kInputMinVec); + const Tensor& input_min_vec = ctx->input(kInputMinVecIndex); float* input_min_vec_data = (float*)const_cast( static_cast(input_min_vec.flat().data())); - const Tensor& input_max_vec = ctx->input(kInputMaxVec); + const Tensor& input_max_vec = ctx->input(kInputMaxVecIndex); float* input_max_vec_data = (float*)const_cast( static_cast(input_max_vec.flat().data())); - const Tensor& input_requested_min = ctx->input(this->kRequestMin); + const Tensor& input_requested_min = ctx->input(this->kRequestMinIndex); const float input_requested_min_float = input_requested_min.flat()(0); - const Tensor& input_requested_max = ctx->input(this->kRequestMax); + const Tensor& input_requested_max = ctx->input(this->kRequestMaxIndex); const float input_requested_max_float = input_requested_max.flat()(0); size_t depth = input_min_vec.NumElements(); OP_REQUIRES( ctx, input_min_vec.dim_size(0) == depth, - errors::InvalidArgument("min has incorrect size, expected ", depth, + errors::InvalidArgument("input_min has incorrect size, expected ", depth, " was ", input_min_vec.dim_size(0))); OP_REQUIRES( ctx, input_max_vec.dim_size(0) == depth, - errors::InvalidArgument("max has incorrect size, expected ", depth, + errors::InvalidArgument("input_max has incorrect size, expected ", depth, " was ", input_max_vec.dim_size(0))); - if (out_type_ == DT_QINT8) CHECK(input_requested_min_float < 0.0f); + if (out_type_ == DT_QINT8) DCHECK(input_requested_min_float < 0.0f); const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f; float requested_min_max = std::max(std::abs(input_requested_min_float), @@ -82,12 +82,11 @@ class MklRequantizePerChannelOp : public OpKernel { input.shape(), &output)); std::vector scales(depth); - for (int i = 0; i < depth; i++) { + for (int i = 0; i < depth; ++i) { float min_max_from_vec = std::max(std::abs(input_min_vec_data[i]), std::abs(input_max_vec_data[i])); - float scale = + scales[i] = factor * (min_max_from_vec / requested_min_max / (float)(1L << 31)); - scales[i] = scale; } mkldnn::primitive_attr reorder_attr; @@ -133,8 +132,8 @@ class MklRequantizePerChannelOp : public OpKernel { Tensor* output_min = nullptr; Tensor* output_max = nullptr; - OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMin, {}, &output_min)); - OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMax, {}, &output_max)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMinIndex, {}, &output_min)); + OP_REQUIRES_OK(ctx, ctx->allocate_output(kOutputMaxIndex, {}, &output_max)); output_min->flat()(0) = input_requested_min_float; output_max->flat()(0) = input_requested_max_float; @@ -150,13 +149,13 @@ class MklRequantizePerChannelOp : public OpKernel { private: const int kInputTensorIndex = 0; - const int kInputMinVec = 1; - const int kInputMaxVec = 2; - const int kRequestMin = 3; - const int kRequestMax = 4; + const int kInputMinVecIndex = 1; + const int kInputMaxVecIndex = 2; + const int kRequestMinIndex = 3; + const int kRequestMaxIndex = 4; const int kOutputTensorIndex = 0; - const int kOutputMin = 1; - const int kOutputMax = 2; + const int kOutputMinIndex = 1; + const int kOutputMaxIndex = 2; DataType out_type_; engine cpu_engine_ = engine(engine::cpu, 0); }; -- GitLab From 24ce9f5bb519a10a6f431a425487561ffbceda2e Mon Sep 17 00:00:00 2001 From: Dayananda-V Date: Mon, 28 Jan 2019 16:25:20 +0530 Subject: [PATCH 0031/1185] TF Framework error_test missing test case add 1-error_code_from_exception_type api test case --- tensorflow/python/framework/errors_impl.py | 6 +++++- tensorflow/python/framework/errors_test.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py index 922b9e2bd3..c473dfeedf 100644 --- a/tensorflow/python/framework/errors_impl.py +++ b/tensorflow/python/framework/errors_impl.py @@ -511,7 +511,11 @@ def exception_type_from_error_code(error_code): @tf_export("errors.error_code_from_exception_type") def error_code_from_exception_type(cls): - return _EXCEPTION_CLASS_TO_CODE[cls] + try: + return _EXCEPTION_CLASS_TO_CODE[cls] + except KeyError: + warnings.warn("Unknown class exception") + return UnknownError(None, None, "Unknown class exception", None) def _make_specific_exception(node_def, op, message, error_code): diff --git a/tensorflow/python/framework/errors_test.py b/tensorflow/python/framework/errors_test.py index 574b126cae..c044202d92 100644 --- a/tensorflow/python/framework/errors_test.py +++ b/tensorflow/python/framework/errors_test.py @@ -70,6 +70,10 @@ class ErrorsTest(test.TestCase): isinstance( errors_impl._make_specific_exception(None, None, None, error_code), exc_type)) + # error_code_from_exception_type and exception_type_from_error_code should + # be consistent with operation result. + self.assertEqual(error_code, + errors_impl.error_code_from_exception_type(exc_type)) # pylint: enable=protected-access def testKnownErrorClassForEachErrorCodeInProto(self): @@ -98,6 +102,14 @@ class ErrorsTest(test.TestCase): self.assertTrue("Unknown error code: 37" in str(w[0].message)) self.assertTrue(isinstance(exc, errors_impl.OpError)) + with warnings.catch_warnings(record=True) as w: + # pylint: disable=protected-access + exc = errors_impl.error_code_from_exception_type("Unknown") + # pylint: enable=protected-access + self.assertEqual(1, len(w)) + self.assertTrue("Unknown class exception" in str(w[0].message)) + self.assertTrue(isinstance(exc, errors_impl.OpError)) + def testStatusDoesNotLeak(self): try: with errors.raise_exception_on_not_ok_status() as status: -- GitLab From a6031618c3e71ab4a66faf993f4b96e7cc717cca Mon Sep 17 00:00:00 2001 From: Sergii Khomenko Date: Wed, 30 Jan 2019 22:41:00 +0100 Subject: [PATCH 0032/1185] Fix a minor typo --- tensorflow/python/keras/saving/saved_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/saving/saved_model.py b/tensorflow/python/keras/saving/saved_model.py index fbf0bf68ef..a86555703a 100644 --- a/tensorflow/python/keras/saving/saved_model.py +++ b/tensorflow/python/keras/saving/saved_model.py @@ -52,7 +52,7 @@ def export( `save_model` generates new files/folders under the `saved_model_path` folder: 1) a checkpoint containing the model weights. 2) a saved_model.pb file containing the model's MetaGraphs. The prediction - graph is always exported. The evaluaton and training graphs are exported + graph is always exported. The evaluation and training graphs are exported if the following conditions are met: - Evaluation: model loss is defined. - Training: model is compiled with an optimizer defined under `tf.train`. -- GitLab From 0b92bc4b44e795ec3aa6b66295abec0770e9063f Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 30 Jan 2019 23:10:35 +0100 Subject: [PATCH 0033/1185] Bazel: Remove deprecated experimental_shortened_obj_file_path option `experimental_shortened_obj_file_path` is depreacted since [version `0.17.0`](https://blog.bazel.build/2018/09/14/bazel-0.17.html) and activated by default. Since Tensorflow requires [at least bazel `0.19.0`](https://github.com/tensorflow/tensorflow/blob/master/configure.py#L1559), this flag can be savely removed. --- configure.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/configure.py b/configure.py index 8dcd318220..e626082ce0 100644 --- a/configure.py +++ b/configure.py @@ -1513,10 +1513,6 @@ def set_windows_build_flags(environ_cp): # The host and target platforms are the same in Windows build. So we don't # have to distinct them. This avoids building the same targets twice. write_to_bazelrc('build --distinct_host_configuration=false') - # Enable short object file path to avoid long path issue on Windows. - # TODO(pcloudy): Remove this flag when upgrading Bazel to 0.16.0 - # Short object file path will be enabled by default. - write_to_bazelrc('build --experimental_shortened_obj_file_path=true') if get_var( environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline', -- GitLab From 7b4860446e610cc704d44f077c40f931b2971ded Mon Sep 17 00:00:00 2001 From: Dayananda-V Date: Thu, 31 Jan 2019 10:45:39 +0530 Subject: [PATCH 0034/1185] TF Framework ops_test missing test case add 1-has_default_graph api test case 2-get_all_collection_keys api test case --- tensorflow/python/framework/ops_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 8347e9d1eb..10a2ce9bde 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1587,6 +1587,8 @@ class CollectionTest(test_util.TensorFlowTestCase): self.assertSequenceEqual(g.collections, ["key"]) g.add_to_collection("other", "foo") self.assertSequenceEqual(sorted(g.collections), ["key", "other"]) + self.assertSequenceEqual(sorted(g.get_all_collection_keys()), + ["key", "other"]) def test_add_to_collection(self): g = ops.Graph() @@ -2408,17 +2410,22 @@ class GraphTest(test_util.TensorFlowTestCase): def testDefaultGraph(self): orig = ops.get_default_graph() + self.assertFalse(ops.has_default_graph()) self._AssertDefault(orig) g0 = ops.Graph() + self.assertFalse(ops.has_default_graph()) self._AssertDefault(orig) context_manager_0 = g0.as_default() + self.assertFalse(ops.has_default_graph()) self._AssertDefault(orig) with context_manager_0 as g0: self._AssertDefault(g0) with ops.Graph().as_default() as g1: + self.assertTrue(ops.has_default_graph()) self._AssertDefault(g1) self._AssertDefault(g0) self._AssertDefault(orig) + self.assertFalse(ops.has_default_graph()) def testPreventFeeding(self): g = ops.Graph() -- GitLab From 058aa720d063795fbe3fa2c9aa26e95790fa349f Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Thu, 31 Jan 2019 15:48:13 +0900 Subject: [PATCH 0035/1185] added complex support for decode_raw --- tensorflow/core/kernels/decode_raw_op.cc | 2 ++ tensorflow/core/ops/parsing_ops.cc | 5 +++- .../python/kernel_tests/decode_raw_op_test.py | 26 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/decode_raw_op.cc b/tensorflow/core/kernels/decode_raw_op.cc index eaef5a6097..3dd019c3d2 100644 --- a/tensorflow/core/kernels/decode_raw_op.cc +++ b/tensorflow/core/kernels/decode_raw_op.cc @@ -110,6 +110,8 @@ REGISTER(uint8); REGISTER(int16); REGISTER(int8); REGISTER(int64); +REGISTER(complex64); +REGISTER(complex128); #undef REGISTER diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc index eff453241d..169076a6f6 100644 --- a/tensorflow/core/ops/parsing_ops.cc +++ b/tensorflow/core/ops/parsing_ops.cc @@ -26,7 +26,10 @@ using shape_inference::ShapeHandle; REGISTER_OP("DecodeRaw") .Input("bytes: string") .Output("output: out_type") - .Attr("out_type: {half,float,double,int32,uint16,uint8,int16,int8,int64}") + .Attr( + "out_type: " + "{half,float,double,int32,uint16,uint8,int16,int8,int64,complex64," + "complex128}") .Attr("little_endian: bool = true") .SetShapeFn([](InferenceContext* c) { // Note: last dimension is data dependent. diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py index 008e59ba3e..bb8d2cf6a0 100644 --- a/tensorflow/python/kernel_tests/decode_raw_op_test.py +++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py @@ -89,6 +89,32 @@ class DecodeRawOpTest(test.TestCase): self.assertAllEqual(expected_result, result) + @test_util.run_deprecated_v1 + def testToComplex64(self): + with self.cached_session(): + in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) + decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.complex64) + self.assertEqual([None, None], decode.get_shape().as_list()) + + expected_result = np.matrix([[1 + 1j, 2 - 2j, -3 + 3j, -4 - 4j]], + dtype=" Date: Thu, 31 Jan 2019 17:30:18 -0800 Subject: [PATCH 0036/1185] Improves docstring for tf.contrib.nn.conv1d_transpose --- tensorflow/python/ops/nn_ops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 51c1f15bac..c06a0c2614 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -3988,21 +3988,22 @@ def conv1d_transpose( filter: A 3-D `Tensor` with the same type as `value` and shape `[filter_width, output_channels, in_channels]`. `filter`'s `in_channels` dimension must match that of `value`. - output_shape: A 1-D `Tensor` representing the output shape of the - deconvolution op. + output_shape: A 1-D `Tensor`, containing three elements, representing the + output shape of the deconvolution op. stride: An `integer`. The number of entries by which the filter is moved right at each step. padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm. See the "returns" section of `tf.nn.convolution` for details. - data_format: A string. 'NHWC' and 'NCHW' are supported. + data_format: A string. `'NWC'` and `'NCW'` are supported. name: Optional name for the returned tensor. Returns: A `Tensor` with the same type as `value`. Raises: - ValueError: If input/output depth does not match `filter`'s shape, or if - padding is other than `'VALID'` or `'SAME'`. + ValueError: If input/output depth does not match `filter`'s shape, if + `output_shape` is not at 3-element vector, if `padding` is other than + `'VALID'` or `'SAME'`, or if `data_format` is invalid. """ with ops.name_scope(name, "conv1d_transpose", [value, filter, output_shape]) as name: -- GitLab From 5679c6f03a5f0053e8946c61da99eb3480e133aa Mon Sep 17 00:00:00 2001 From: Karl Weinmeister <11586922+kweinmeister@users.noreply.github.com> Date: Fri, 1 Feb 2019 16:11:47 -0600 Subject: [PATCH 0037/1185] Add example code to tf.data.Dataset.filter() documentation --- tensorflow/python/data/ops/dataset_ops.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 766c6d5395..6f90b52e69 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1100,6 +1100,21 @@ class DatasetV2(object): def filter(self, predicate): """Filters this dataset according to `predicate`. + ```python + # NOTE: The following examples use `{ ... }` to represent the + # contents of a dataset. + a = { 1, 2, 3 } + b = { 4, 5, 6, 7 } + + a.filter(lambda x: x < 3) == { 1, 2 } + + # `tf.math.equal(x, y)` is required for equality comparison + def filter_fn(x): + return tf.math.equal(x, 4) + + b.filter(filter_fn) == { 4 } + ``` + Args: predicate: A function mapping a nested structure of tensors (having shapes and types defined by `self.output_shapes` and `self.output_types`) to a -- GitLab From 131270d780a85ccb97e1f7c6e1c92dcee176bc28 Mon Sep 17 00:00:00 2001 From: Karl Weinmeister <11586922+kweinmeister@users.noreply.github.com> Date: Fri, 1 Feb 2019 19:57:33 -0600 Subject: [PATCH 0038/1185] Changed tensor pseudo-code so that the example is runnable --- tensorflow/python/data/ops/dataset_ops.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 6f90b52e69..338cfb331e 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1101,18 +1101,15 @@ class DatasetV2(object): """Filters this dataset according to `predicate`. ```python - # NOTE: The following examples use `{ ... }` to represent the - # contents of a dataset. - a = { 1, 2, 3 } - b = { 4, 5, 6, 7 } - - a.filter(lambda x: x < 3) == { 1, 2 } + d = tf.data.Dataset.from_tensor_slices([1, 2, 3]) + + d = d.filter(lambda x: x < 3) # [1, 2] # `tf.math.equal(x, y)` is required for equality comparison def filter_fn(x): - return tf.math.equal(x, 4) + return tf.math.equal(x, 1) - b.filter(filter_fn) == { 4 } + d = d.filter(filter_fn) # [1] ``` Args: -- GitLab From 6477b46119ddc8e4b6a0fb1b399a96cd98f3f904 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Fri, 1 Feb 2019 18:11:53 -0800 Subject: [PATCH 0039/1185] Addressed review comments --- tensorflow/core/kernels/mkl_concat_op.cc | 31 +++++++++---------- .../kernels/mkl_quantized_concat_op_test.cc | 26 ++++++++-------- tensorflow/core/ops/mkl_array_ops.cc | 8 ++--- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index a955a90990..ba6c0f2389 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -228,9 +228,9 @@ class MklConcatOp : public OpKernel { OpInputList input_mins, input_maxes; if (std::is_same::value || std::is_same::value) { - // MKL DNN concat does not support input tensors that have different - // ranges, check if the ranges of the all input tensors are the same - // if not, forward it to Eigen implementation. + // MKL-DNN concat does not support input tensors that have different + // ranges. Check if the ranges of the all input tensors are the same. + // If not, forward it to Eigen implementation. OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins)); OP_REQUIRES(context, (input_mins.size() == N), @@ -247,7 +247,7 @@ class MklConcatOp : public OpKernel { float input_min = input_mins[0].flat()(0); float input_max = input_maxes[0].flat()(0); const float eps = 1.0e-6; - for (int i = 1; i < N; i++) { + for (int i = 1; i < N; ++i) { float min = input_mins[i].flat()(0); float max = input_maxes[i].flat()(0); @@ -260,19 +260,16 @@ class MklConcatOp : public OpKernel { // Call Eigen library if (invoke_eigen) { - if (std::is_same::value || std::is_same::value) { - // MKL DNN quantized concat does not support input tensors with - // different ranges. - // TODO (mabuzain): Add quantized version of CallEigen() to support - // this case. - OP_REQUIRES(context, false, - errors::Unimplemented("MKL DNN quantized concat does not " - "support input tensors that have " - "different ranges")); - } else { - CallEigenVersion(context, input_tensors, mkl_input_shapes); - } - + // MKL-DNN quantized concat does not support input tensors with + // different ranges. + // TODO (mabuzain): Add quantized version of CallEigen() to support + // this case. + OP_REQUIRES(context, (!std::is_same::value && + !std::is_same::value), + errors::Unimplemented("MKL DNN quantized concat does not " + "support input tensors that have " + "different ranges")); + CallEigenVersion(context, input_tensors, mkl_input_shapes); return; } diff --git a/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc b/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc index f9098f960a..d483b85d48 100644 --- a/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc +++ b/tensorflow/core/kernels/mkl_quantized_concat_op_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -41,12 +41,12 @@ namespace tensorflow { using test::graph::Constant; -// Helper class for converting MKL tesnors to TF tensors and comparing to -// expected values - static const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0}; static const TensorShape dummy_shape({8}); +// Helper class for converting MKL tensors to TF tensors and comparing to +// expected values + class ConvMklToTF : public OpsTestBase { public: template @@ -55,7 +55,7 @@ class ConvMklToTF : public OpsTestBase { // Create an MKL to TF conversion node and execute it TF_EXPECT_OK(NodeDefBuilder("mkl_to_tf_op", "_MklToTf") .Input(FakeInput(dtype)) // Input - .Input(FakeInput(DT_UINT8)) // Mkl second tensor + .Input(FakeInput(DT_UINT8)) // MKL second tensor .Attr("T", dtype) .Attr("_kernel", "MklOp") .Finalize(node_def())); @@ -91,10 +91,10 @@ void QuantizedConcatTest::TestSmall8Bit(float first_min, float first_max, .Input(FakeInput(DT_INT32)) .Input(FakeInput(2, DT_FLOAT)) .Input(FakeInput(2, DT_FLOAT)) - .Input(FakeInput(2, DT_UINT8)) // MKl second tensor - .Input(FakeInput(DT_UINT8)) // MKl second tensor - .Input(FakeInput(2, DT_UINT8)) // MKl second tensor - .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Input(FakeInput(2, DT_UINT8)) // MKL second tensor + .Input(FakeInput(DT_UINT8)) // MKL second tensor + .Input(FakeInput(2, DT_UINT8)) // MKL second tensor + .Input(FakeInput(2, DT_UINT8)) // MKL second tensor .Attr("N", 2) .Attr("T", DataTypeToEnum::v()) .Attr("Tidx", DT_INT32) @@ -164,10 +164,10 @@ void QuantizedConcatTest::TestSecondDim8Bit(float first_min, float first_max, .Input(FakeInput(DT_INT32)) .Input(FakeInput(2, DT_FLOAT)) .Input(FakeInput(2, DT_FLOAT)) - .Input(FakeInput(2, DT_UINT8)) // MKl second tensor - .Input(FakeInput(DT_UINT8)) // MKl second tensor - .Input(FakeInput(2, DT_UINT8)) // MKl second tensor - .Input(FakeInput(2, DT_UINT8)) // MKl second tensor + .Input(FakeInput(2, DT_UINT8)) // MKL second tensor + .Input(FakeInput(DT_UINT8)) // MKL second tensor + .Input(FakeInput(2, DT_UINT8)) // MKL second tensor + .Input(FakeInput(2, DT_UINT8)) // MKL second tensor .Attr("N", 2) .Attr("T", DataTypeToEnum::v()) .Attr("Tidx", DT_INT32) diff --git a/tensorflow/core/ops/mkl_array_ops.cc b/tensorflow/core/ops/mkl_array_ops.cc index ad15d0bf88..0279401289 100644 --- a/tensorflow/core/ops/mkl_array_ops.cc +++ b/tensorflow/core/ops/mkl_array_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,8 +33,8 @@ using shape_inference::InferenceContext; using shape_inference::ShapeHandle; using shape_inference::UnchangedShape; -// Adding QuantizedConcatV2 which is similar to the existing QuantizedConcat -// op to be able to replace it by _MklQuantizedConcatV2 in the graph rewrite. +// Adding QuantizedConcatV2 op to be able to replace it by +// _MklQuantizedConcatV2 in the graph rewrite. REGISTER_OP("QuantizedConcatV2") .Input("values: N * T") .Input("axis: Tidx") @@ -89,4 +89,4 @@ REGISTER_OP("_MklQuantizedConcatV2") }); } -#endif \ No newline at end of file +#endif -- GitLab From 2df4ae7033d57d6e61a30a36d1b3ea12ff223bb0 Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 1 Feb 2019 21:41:49 -0800 Subject: [PATCH 0040/1185] review comments 2a --- ...pi_def_RequantizationRangePerChannel.pbtxt | 44 +++++++++++++ .../api_def_RequantizePerChannel.pbtxt | 62 +++++++++++++++++++ ...mkl_requantization_range_per_channel_op.cc | 31 +++++++--- .../core/kernels/mkl_requantize_ops_test.cc | 46 +++++++------- .../kernels/mkl_requantize_per_channel_op.cc | 2 +- 5 files changed, 152 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt index 2226027a42..bf80a35691 100644 --- a/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_RequantizationRangePerChannel.pbtxt @@ -1,4 +1,48 @@ op { graph_op_name: "RequantizationRangePerChannel" visibility : HIDDEN + in_arg { + name: "input" + description: <