From 926259c411c1022812ffb7fe88ca61f0180bd778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 09:51:09 +0800 Subject: [PATCH 0001/1262] TST: test case for string --- tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 9f57949515..83d69c651a 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -364,6 +364,16 @@ class ScatterNdTest(test.TestCase): del input_ # input_ is not used in scatter_nd return array_ops.scatter_nd(indices, updates, shape) + def testString(self): + indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) + updates = constant_op.constant(["four", "three", "one", "seven"], dtype=dtypes.string) + expected = np.array(["", "one", "", "three", "four", "", "", "seven"]) + scatter = self.scatter_nd(indices, updates, shape=(8,)) + + with self.test_session() as sess: + result = sess.run(scatter) + self.assertTrue(np.array_equal(result, expected)) + def testRank3ValidShape(self): indices = array_ops.zeros([2, 2, 2], dtypes.int32) updates = array_ops.zeros([2, 2, 2], dtypes.int32) -- GitLab From 005840c6e2d2a4c25ecd293162a38a79dedf1a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 10:06:44 +0800 Subject: [PATCH 0002/1262] ENH: supports string for cpu --- tensorflow/core/kernels/scatter_nd_op.cc | 1 + tensorflow/core/kernels/scatter_nd_op_cpu_impl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 3a95dd1773..0caa7bd317 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU); +TF_CALL_string(REGISTER_SCATTER_ND_CPU); // Registers GPU kernels. #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h index cffc326174..155d354d85 100644 --- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h @@ -160,6 +160,7 @@ struct ScatterNdFunctor { REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB); TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE); +REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH) #undef REGISTER_SCATTER_ND_MATH -- GitLab From d887d2bcfc819034b17e812a9a60460e2d61e447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 12:14:40 +0800 Subject: [PATCH 0003/1262] TST: ignore NonAliasingAdd --- tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 83d69c651a..03b2f892c6 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -594,6 +594,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest): shape, dtype=updates.dtype)) return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates) + def testString(self): + # Not supported yet. + pass + if __name__ == "__main__": test.main() -- GitLab From 4b697e0d9472215c706bdb36bb72986cdce78edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 13:51:34 +0800 Subject: [PATCH 0004/1262] DOC: modify document --- tensorflow/core/ops/array_ops.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 5a31f433ce..933ebe6b63 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -5332,12 +5332,13 @@ REGISTER_OP("ScatterNd") .Attr("Tindices: {int32, int64}") .SetShapeFn(ScatterNdShape) .Doc(R"doc( -Scatter `updates` into a new (initially zero) tensor according to `indices`. +Scatter `updates` into a new (initially zero for numeric, empty for string) +tensor according to `indices`. -Creates a new tensor by applying sparse `updates` to individual -values or slices within a zero tensor of the given `shape` according to -indices. This operator is the inverse of the @{tf.gather_nd} operator which -extracts values or slices from a given tensor. +Creates a new tensor by applying sparse `updates` to individual values or +slices within a zero (or empty string) tensor of the given `shape` +according to indices. This operator is the inverse of the @{tf.gather_nd} +operator which extracts values or slices from a given tensor. **WARNING**: The order in which updates are applied is nondeterministic, so the output will be nondeterministic if `indices` contains duplicates. -- GitLab From 597403e03680d69b72dbfa669f7bbdc77ce21ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 20 Dec 2017 16:34:48 +0800 Subject: [PATCH 0005/1262] CLN: conform docstring --- tensorflow/core/ops/array_ops.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 933ebe6b63..89b6eb7162 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -5332,13 +5332,12 @@ REGISTER_OP("ScatterNd") .Attr("Tindices: {int32, int64}") .SetShapeFn(ScatterNdShape) .Doc(R"doc( -Scatter `updates` into a new (initially zero for numeric, empty for string) -tensor according to `indices`. +Scatter `updates` into a new empty tensor according to `indices`. Creates a new tensor by applying sparse `updates` to individual values or -slices within a zero (or empty string) tensor of the given `shape` -according to indices. This operator is the inverse of the @{tf.gather_nd} -operator which extracts values or slices from a given tensor. +slices within a tensor (initially zero for numeric, empty for string) of +the given `shape` according to indices. This operator is the inverse of the +@{tf.gather_nd} operator which extracts values or slices from a given tensor. **WARNING**: The order in which updates are applied is nondeterministic, so the output will be nondeterministic if `indices` contains duplicates. -- GitLab From e2a0db74cfa4ed73692ec5d0af944660bb4b688c Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Tue, 6 Feb 2018 17:52:07 -0800 Subject: [PATCH 0006/1262] Python3 support of docs generation --- tensorflow/docs_src/community/documentation.md | 18 +++--------------- tensorflow/tools/docs/BUILD | 2 +- tensorflow/tools/docs/build_docs_test.py | 4 ---- tensorflow/tools/docs/generate_lib.py | 2 -- tensorflow/tools/docs/generate_lib_test.py | 3 --- tensorflow/tools/docs/parser.py | 4 ++-- tensorflow/tools/docs/parser_test.py | 4 ---- tensorflow/tools/docs/pretty_docs.py | 12 ++++++------ tensorflow/workspace.bzl | 11 ----------- 9 files changed, 12 insertions(+), 48 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 003e0a25ec..8d55148e48 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,19 +148,7 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If pip isn't installed on your machine, install it now by issuing the -following command: - - $ sudo easy_install pip - -2. Use pip to install codegen, mock, and pandas by issuing the following - command (Note: If you are using - a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your - dependencies, you may not want to use sudo for these installations): - - $ sudo pip install codegen mock pandas - -3. If bazel is not installed on your machine, install it now. If you are on +1. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -168,10 +156,10 @@ following command: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -4. Change directory to the top-level `tensorflow` directory of the TensorFlow +2. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -5. Run the `configure` script and answer its prompts appropriately for your +3. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index 8f10bc9e0c..cafa1f7eb3 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -37,7 +37,7 @@ py_library( srcs = ["parser.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = ["@com_github_andreif_codegen"], + deps = ["@astor_archive//:astor"], ) py_test( diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py index ae293f6576..2e8f634e7c 100644 --- a/tensorflow/tools/docs/build_docs_test.py +++ b/tensorflow/tools/docs/build_docs_test.py @@ -39,10 +39,6 @@ class Flags(object): class BuildDocsTest(googletest.TestCase): def testBuildDocs(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - doc_generator = generate_lib.DocGenerator() doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)]) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 003f972070..635408d87f 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -455,8 +455,6 @@ class DocGenerator(object): """Main entry point for generating docs.""" def __init__(self): - if sys.version_info >= (3, 0): - sys.exit('Doc generation is not supported from python3.') self.argument_parser = argparse.ArgumentParser() self._py_modules = None self._private_map = _get_default_private_map() diff --git a/tensorflow/tools/docs/generate_lib_test.py b/tensorflow/tools/docs/generate_lib_test.py index 1ceaf31f1c..ea6d28a02b 100644 --- a/tensorflow/tools/docs/generate_lib_test.py +++ b/tensorflow/tools/docs/generate_lib_test.py @@ -52,9 +52,6 @@ class DummyVisitor(object): class GenerateTest(googletest.TestCase): def test_write(self): - if sys.version_info >= (3, 0): - self.skipTest('Warning: Doc generation is not supported from python3.') - module = sys.modules[__name__] index = { diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 3db164c2b5..1798378d55 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -26,7 +26,7 @@ import os import re import sys -import codegen +import astor import six from google.protobuf.message import Message as ProtoMessage @@ -705,7 +705,7 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = codegen.to_source(ast_default) + default_text = astor.to_source(ast_default) if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. diff --git a/tensorflow/tools/docs/parser_test.py b/tensorflow/tools/docs/parser_test.py index 8a0e9af521..7d2bf9177a 100644 --- a/tensorflow/tools/docs/parser_test.py +++ b/tensorflow/tools/docs/parser_test.py @@ -523,10 +523,6 @@ class TestParseFunctionDetails(googletest.TestCase): class TestGenerateSignature(googletest.TestCase): def test_known_object(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - known_object = object() reverse_index = {id(known_object): 'location.of.object.in.api'} diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py index 543b5fa6fe..55ab5bdd49 100644 --- a/tensorflow/tools/docs/pretty_docs.py +++ b/tensorflow/tools/docs/pretty_docs.py @@ -101,7 +101,7 @@ def _build_class_page(page_info): link_template = '[`{short_name}`]({url})' parts.append(', '.join( - link_template.format(**base.__dict__) for base in page_info.bases)) + link_template.format(**base._asdict()) for base in page_info.bases)) parts.append('\n\n') @@ -159,7 +159,7 @@ def _build_class_page(page_info): h3 = ('

' '{short_name}' '

\n\n') - parts.append(h3.format(**method_info.__dict__)) + parts.append(h3.format(**method_info._asdict())) if method_info.signature is not None: parts.append(_build_signature(method_info, use_full_name=False)) @@ -217,7 +217,7 @@ def _build_module_page(page_info): template = '[`{short_name}`]({url}) module' for item in page_info.modules: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -229,7 +229,7 @@ def _build_module_page(page_info): template = '[`class {short_name}`]({url})' for item in page_info.classes: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -241,7 +241,7 @@ def _build_module_page(page_info): template = '[`{short_name}(...)`]({url})' for item in page_info.functions: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -254,7 +254,7 @@ def _build_module_page(page_info): parts.append('## Other Members\n\n') for item in page_info.other_members: - parts.append('`{short_name}`\n\n'.format(**item.__dict__)) + parts.append('`{short_name}`\n\n'.format(**item._asdict())) return ''.join(parts) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index eca744a920..4a2274eb1a 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -328,17 +328,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:backports_weakref.BUILD")), ) - tf_http_archive( - name = "com_github_andreif_codegen", - urls = [ - "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz", - "https://github.com/andreif/codegen/archive/1.0.tar.gz", - ], - sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", - strip_prefix = "codegen-1.0", - build_file = str(Label("//third_party:codegen.BUILD")), - ) - filegroup_external( name = "org_python_license", licenses = ["notice"], # Python 2.0 -- GitLab From 4f5d9a88f84e2261808bc986ece951e6e1d10725 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Tue, 6 Feb 2018 17:55:15 -0800 Subject: [PATCH 0007/1262] remove unused codegen.BUILD --- third_party/codegen.BUILD | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 third_party/codegen.BUILD diff --git a/third_party/codegen.BUILD b/third_party/codegen.BUILD deleted file mode 100644 index df436c8163..0000000000 --- a/third_party/codegen.BUILD +++ /dev/null @@ -1,16 +0,0 @@ -# -*- mode: python; -*- -# -# Description: -# Extension to ast that allow ast -> python code generation. - -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # New BSD - -exports_files(["LICENSE"]) - -py_library( - name = "com_github_andreif_codegen", - srcs = glob(["codegen.py"]), - srcs_version = "PY2AND3", -) -- GitLab From 736e8c4ccb16718d11cf7c8e1fac843bf6e388a7 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Wed, 14 Feb 2018 18:26:20 +0900 Subject: [PATCH 0008/1262] fix typo --- tensorflow/core/lib/io/record_writer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/io/record_writer.cc b/tensorflow/core/lib/io/record_writer.cc index 3657243c5d..ebc5648269 100644 --- a/tensorflow/core/lib/io/record_writer.cc +++ b/tensorflow/core/lib/io/record_writer.cc @@ -49,7 +49,7 @@ RecordWriterOptions RecordWriterOptions::CreateRecordWriterOptions( #endif // IS_SLIM_BUILD } else if (compression_type != compression::kNone) { LOG(ERROR) << "Unsupported compression_type:" << compression_type - << ". No comprression will be used."; + << ". No compression will be used."; } return options; } -- GitLab From 617fa4e5fa634270c36a2a8762e6ce96bd38f2f8 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Wed, 14 Feb 2018 18:35:31 +0900 Subject: [PATCH 0009/1262] fix typo --- tensorflow/contrib/makefile/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index b0228c5435..995230dfa8 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -155,7 +155,7 @@ CC_PREFIX=ccache tensorflow/contrib/makefile/build_all_android.sh -s tensorflow/ (add -T on subsequent builds to skip protobuf downloading/building) -#### Testing the the CUDA-enabled benchmark via adb: +#### Testing the CUDA-enabled benchmark via adb: Build binaries first as above, then run: ```bash -- GitLab From 15f3b920ad7eb7fcca3afee14d16049db2046d4b Mon Sep 17 00:00:00 2001 From: Nathan Luehr Date: Wed, 14 Feb 2018 16:27:23 -0800 Subject: [PATCH 0010/1262] Fix __shared__ types with non-empty constructor std::complex has a non-empty constructor (zero assignment) that is not compatible with CUDA __shared__ memory. This fixes current reliance on undefined behavior. (and removes an unnecessary run-time initialization). --- .../core/kernels/reduction_gpu_kernels.cu.h | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 15ae4c1fc5..95a3e222b5 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -244,6 +244,33 @@ __global__ void RowReduceKernel( if (row < num_rows && lane == 0) out[row] = sum; } +template +struct storage_type { + T1 val; + __host__ __device__ storage_type() {} + __host__ __device__ operator T1() { return val; } + __host__ __device__ storage_type& operator=(const T1& in) { + val = in; + return *this; + } +}; + +template +struct storage_type> { + T2 real; + T2 imag; + __host__ __device__ storage_type() {} + __host__ __device__ operator std::complex() { + return std::complex(real, imag); + } + __host__ __device__ storage_type>& operator=( + const std::complex& in) { + real = in.real(); + imag = in.imag(); + return *this; + } +}; + // Works only if there are <= 16 columns // each warps sums over multiple rows at once template @@ -268,7 +295,7 @@ __global__ void ColumnReduceMax16ColumnsKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += rows_per_warp * gridDim.y * blockDim.y; for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { @@ -294,7 +321,8 @@ __global__ void ColumnReduceMax16ColumnsKernel( if (blockDim.y > 1) { for (int row = 1; row < blockDim.y; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } } @@ -316,7 +344,7 @@ __global__ void ColumnReduceKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += gridDim.y * blockDim.y; @@ -347,7 +375,8 @@ __global__ void ColumnReduceKernel( min(blockDim.y, num_rows - blockIdx.y * blockDim.y); for (int row = 1; row < numRowsThisBlock; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } out[col * gridDim.y + blockIdx.y] = s; -- GitLab From b81aaac898d93e17b4a280bb02547d2a60d490cb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Feb 2018 08:28:12 +0000 Subject: [PATCH 0011/1262] Fix warnings in tf.contrib.bayesflow.monte_carlo.expectation This fix fixes several warnings in tf.contrib.bayesflow.monte_carlo.expectation by switching to keepdims for tf.reduce_mean. Signed-off-by: Yong Tang --- tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py index 985177e897..5263e87ae6 100644 --- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py @@ -328,7 +328,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, if not callable(f): raise ValueError('`f` must be a callable function.') if use_reparametrization: - return math_ops.reduce_mean(f(samples), axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(f(samples), axis=axis, keepdims=keep_dims) else: if not callable(log_prob): raise ValueError('`log_prob` must be a callable function.') @@ -348,7 +348,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, # "Is there a floating point value of x, for which x-x == 0 is false?" # http://stackoverflow.com/q/2686644 fx += stop(fx) * (logpx - stop(logpx)) # Add zeros_like(logpx). - return math_ops.reduce_mean(fx, axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(fx, axis=axis, keepdims=keep_dims) def _sample_mean(values): -- GitLab From 9c272adf248228408448db6219b238145f5a02ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 16 Feb 2018 10:38:50 +0800 Subject: [PATCH 0012/1262] DOC: move doc to api def file --- .../core/api_def/base_api/api_def_ScatterNd.pbtxt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt index 4cb8c064fc..4e95895f54 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt @@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according to the indices. END } - summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`." + summary: "Scatter `updates` into a new empty tensor according to `indices`." description: < Date: Mon, 19 Feb 2018 12:56:40 +0400 Subject: [PATCH 0013/1262] Add broadcasting functionality fro Div and Sub ops. --- tensorflow/contrib/lite/kernels/div.cc | 117 ++++++-- tensorflow/contrib/lite/kernels/div_test.cc | 174 ++++++++++++ .../internal/optimized/optimized_ops.h | 268 +++++++++++++++++- .../internal/reference/reference_ops.h | 257 +++++++++++++++++ tensorflow/contrib/lite/kernels/sub.cc | 135 +++++++-- tensorflow/contrib/lite/kernels/sub_test.cc | 213 ++++++++++++++ .../testing/generated_examples_zip_test.cc | 15 +- 7 files changed, 1122 insertions(+), 57 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/div_test.cc create mode 100644 tensorflow/contrib/lite/kernels/sub_test.cc diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc index 44bd0dc85d..c77a0de9b7 100644 --- a/tensorflow/contrib/lite/kernels/div.cc +++ b/tensorflow/contrib/lite/kernels/div.cc @@ -37,7 +37,23 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; +struct OpData { + bool requires_broadcast; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -45,35 +61,85 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); - for (int i = 0; i < NumDimensions(input1); ++i) { - TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), - SizeOfDimension(input2, i)); - } + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = input2->type; + + data->requires_broadcast = !HaveSameShapes(input1, input2); - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - TF_LITE_ENSURE_EQ(context, input2->type, output->type); + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); return context->ResizeTensor(context, output, output_size); } template -void EvalDivFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteDivParams* params, TfLiteTensor* input1, - TfLiteTensor* input2, TfLiteTensor* output) { +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); -#define TF_LITE_DIV(type) \ - type::Div(GetTensorData(input1), GetTensorDims(input1), \ - GetTensorData(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData(output), GetTensorDims(output)) +#define TF_LITE_DIV(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDiv); + } else { + TF_LITE_DIV(reference_ops, Div); + } + } else { + if (data->requires_broadcast) { + TF_LITE_DIV(optimized_ops, BroadcastDiv); + } else { + TF_LITE_DIV(optimized_ops, Div); + } + } +#undef TF_LITE_DIV +} + +template +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + + int32_t output_multiplier; + int output_shift; + + double real_multiplier = + input1->params.scale * input2->params.scale / output->params.scale; + QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_DIV(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + input1_offset, GetTensorData(input2), \ + GetTensorDims(input2), input2_offset, output_offset, \ + output_multiplier, output_shift, output_activation_min, \ + output_activation_max, GetTensorData(output), \ + GetTensorDims(output)); + // The quantized version of Div doesn't support activations, so we + // always use BroadcastDiv. if (kernel_type == kReference) { - TF_LITE_DIV(reference_ops); + TF_LITE_DIV(reference_ops, BroadcastDiv); } else { - TF_LITE_DIV(optimized_ops); + TF_LITE_DIV(optimized_ops, BroadcastDiv); } #undef TF_LITE_DIV } @@ -81,15 +147,20 @@ void EvalDivFloat(TfLiteContext* context, TfLiteNode* node, template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { - EvalDivFloat(context, node, params, input1, input2, output); + EvalFloat(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + EvalQuantized(context, node, params, data, input1, input2, + output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Div only supports FLOAT32 and quantized UINT8 now."); return kTfLiteError; } @@ -99,19 +170,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace div TfLiteRegistration* Register_DIV_REF() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } TfLiteRegistration* Register_DIV_GENERIC_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } TfLiteRegistration* Register_DIV_NEON_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/div_test.cc b/tensorflow/contrib/lite/kernels/div_test.cc new file mode 100644 index 0000000000..78918a0d79 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/div_test.cc @@ -0,0 +1,174 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseDivOpModel : public SingleOpModel { + public: + BaseDivOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_DIV, BuiltinOptions_DivOptions, + CreateDivOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +// For quantized Div, the error shouldn't exceed (2*step + step^2). +// The param min=-1.0 & max=1.0 is used in the following tests. +// The tolerance value is ~0.0157. +const float kQuantizedStep = 2.0 / 255.0; +const float kQuantizedTolerance = + 2.0 * kQuantizedStep + kQuantizedStep * kQuantizedStep; + +class QuantizedDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST(FloatDivOpTest, NoActivation) { + FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.5, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.4, 1.0, 0.8, 1.6}))); +} + +TEST(FloatDivOpTest, ActivationRELU_N1_TO_1) { + FloatDivOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-1.0, 1.0, 0.8, 1.0}))); +} + +TEST(FloatDivOpTest, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.3, 0.8, 1.1, -2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.6, 0.5, -1.1, -0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-20.0, 1.0, 0.5, 1.6, -1.0, 20.0}))) + << "With shape number " << i; + } +} + +TEST(FloatDivOpTest, WithBroadcast) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.PopulateTensor(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.0, 2.0, 0.7, 0.8, 1.1, -1.23}))) + << "With shape number " << i; + } +} + +TEST(QuantizedDivOpTest, NoActivation) { + QuantizedDivOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-0.6, 0.2, 0.9, -0.7}); + m.QuantizeAndPopulate(m.input2(), {0.8, 0.4, 0.9, -0.8}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-0.75, 0.5, 1.0, 0.875}, + kQuantizedTolerance))); +} + +// for quantized Div, the error shouldn't exceed 2*step +float GetTolerance(int min, int max) { + float kQuantizedStep = (max - min) / 255.0; + float kQuantizedTolerance = 2.0 * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST(QuantizedDivOpTest, WithBroadcast) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedDivOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, // always a scalar + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.QuantizeAndPopulate(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23}, kQuantizedTolerance))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index dec58fea4f..d12a3eca1d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1928,6 +1928,126 @@ inline void Div(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastDiv is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] / + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val / input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, + input2_dims, input2_offset, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data, output_dims); +} + // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, @@ -1955,6 +2075,152 @@ inline void Sub(const float* input1_data, const Dims<4>& input1_dims, } } } + +// TODO(jiawen): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastSub is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val - scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + BroadcastSub(left_shift, input1_data, input1_dims, input1_offset, + input1_multiplier, input1_shift, input2_data, input2_dims, + input2_offset, input2_multiplier, input2_shift, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, @@ -2866,7 +3132,7 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); +gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 5f4d5be323..c7b7687622 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1208,6 +1208,122 @@ inline void Div(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] / + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val / input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, + input2_dims, input2_offset, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data, output_dims); +} + inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, @@ -1235,6 +1351,147 @@ inline void Sub(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val - scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + BroadcastSub(left_shift, input1_data, input1_dims, input1_offset, + input1_multiplier, input1_shift, input2_data, input2_dims, + input2_offset, input2_multiplier, input2_shift, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index ddaf498d5b..410585a293 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -26,7 +26,7 @@ namespace ops { namespace builtin { namespace sub { -// This file has three implementation of Div. +// This file has three implementation of Sub. enum KernelType { kReference, kGenericOptimized, // Neon-free @@ -37,7 +37,23 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; +struct OpData { + bool requires_broadcast; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -45,51 +61,122 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); - for (int i = 0; i < NumDimensions(input1); ++i) { - TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), - SizeOfDimension(input2, i)); - } + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = input2->type; - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - TF_LITE_ENSURE_EQ(context, input2->type, output->type); + data->requires_broadcast = !HaveSameShapes(input1, input2); + + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); return context->ResizeTensor(context, output, output_size); } template void EvalSubFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteSubParams* params, TfLiteTensor* input1, - TfLiteTensor* input2, TfLiteTensor* output) { + TfLiteSubParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); -#define TF_LITE_Sub(type) \ - type::Sub(GetTensorData(input1), GetTensorDims(input1), \ - GetTensorData(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData(output), GetTensorDims(output)) +#define TF_LITE_SUB(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastSub); + } else { + TF_LITE_SUB(reference_ops, Sub); + } + } else { + if (data->requires_broadcast) { + TF_LITE_SUB(optimized_ops, BroadcastSub); + } else { + TF_LITE_SUB(optimized_ops, Sub); + } + } +#undef TF_LITE_SUB +} + +template +void EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteSubParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + const int left_shift = 20; + const double twice_max_input_scale = + 2 * std::max(input1->params.scale, input2->params.scale); + const double real_input1_multiplier = + input1->params.scale / twice_max_input_scale; + const double real_input2_multiplier = + input2->params.scale / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << left_shift) * output->params.scale); + + int32 input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, + &input1_shift); + int32 input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, + &input2_shift); + int32 output_multiplier; + int output_shift; + QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_SUB(type, opname) \ + type::opname(left_shift, GetTensorData(input1), \ + GetTensorDims(input1), input1_offset, input1_multiplier, \ + input1_shift, GetTensorData(input2), \ + GetTensorDims(input2), input2_offset, input2_multiplier, \ + input2_shift, output_offset, output_multiplier, output_shift, \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)); + // The quantized version of Sub doesn't support activations, so we + // always use BroadcastSub. if (kernel_type == kReference) { - TF_LITE_Sub(reference_ops); + TF_LITE_SUB(reference_ops, BroadcastSub); } else { - TF_LITE_Sub(optimized_ops); + TF_LITE_SUB(optimized_ops, BroadcastSub); } -#undef TF_LITE_Sub +#undef TF_LITE_SUB } template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { - EvalSubFloat(context, node, params, input1, input2, output); + EvalSubFloat(context, node, params, data, input1, input2, + output); + } else if (output->type == kTfLiteUInt8) { + EvalSubQuantized(context, node, params, data, input1, input2, + output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Inputs and outputs not all float|unit8 types."); return kTfLiteError; } @@ -99,19 +186,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace sub TfLiteRegistration* Register_SUB_REF() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } TfLiteRegistration* Register_SUB_GENERIC_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } TfLiteRegistration* Register_SUB_NEON_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/sub_test.cc b/tensorflow/contrib/lite/kernels/sub_test.cc new file mode 100644 index 0000000000..b2c6d05f62 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/sub_test.cc @@ -0,0 +1,213 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseSubOpModel : public SingleOpModel { + public: + BaseSubOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_Sub, BuiltinOptions_SubOptions, + CreateSubOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatSubOpModel : public BaseSubOpModel { + public: + using BaseSubOpModel::BaseSubOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedSubOpModel : public BaseSubOpModel { + public: + using BaseSubOpModel::BaseSubOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// for quantized Sub, the error shouldn't exceed 2*step +float GetTolerance(int min, int max) { + float kQuantizedStep = (max - min) / 255.0; + float kQuantizedTolerance = 2.0 * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST(FloatSubOpModel, NoActivation) { + FloatSubOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-2.1, 0.0, 1.4, -0.3})); +} + +TEST(FloatSubOpModel, ActivationRELU_N1_TO_1) { + FloatSubOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.0, 0.0, 1.0, -0.3})); +} + +TEST(FloatSubOpModel, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatSubOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5, -1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8, -1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({-2.1, 0.0, 1.4, -0.3, 0.0, 1.9})) + << "With shape number " << i; + } +} + +TEST(FloatSubOpModel, WithBroadcast) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatSubOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5, -1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.5}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.5, -0.3, 1.2, 0.0, -1.6, 1.5}))) + << "With shape number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = { + {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = { + {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.2}, {0.6, 0.4, -0.18, 0.5}}; + std::vector> results = { + {-0.5, -0.2, 0.0, 0.3}, {-0.8, -0.2, -0.1, 0.9}, {-0.61, -0.2, 0.88, -0.2}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedTestsActivationRELU_N1_TO_1) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.8, 0.2, 0.7, 0.5}}; + std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.4, -0.8, 0.3}}; + std::vector> results = {{-1.0, -0.2, 0.0, 1.0}, + {-1.0, -0.2, 1.0, 0.2}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_RELU_N1_TO_1); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, + kQuantizedTolerance))) + << "With shape number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedWithBroadcast) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.7}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, + kQuantizedTolerance))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 49766cedac..1e177d5f6e 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -47,9 +47,6 @@ tensorflow::Env* env = tensorflow::Env::Default(); // Key is a substring of the test name and value is a bug number. // TODO(ahentz): make sure we clean this list up frequently. std::map kBrokenTests = { - // Sub and Div don't support broadcasting. - {R"(^\/diva.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, - {R"(^\/suba.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, // Add only supports float32. (and "constant" tests use Add) {R"(^\/adda.*int32)", "68808744"}, @@ -235,22 +232,23 @@ TEST_P(OpsTest, RunStuff) { INSTANTIATE_TESTS(add) INSTANTIATE_TESTS(avg_pool) -INSTANTIATE_TESTS(space_to_batch_nd) INSTANTIATE_TESTS(batch_to_space_nd) INSTANTIATE_TESTS(concat) INSTANTIATE_TESTS(constant) INSTANTIATE_TESTS(control_dep) INSTANTIATE_TESTS(conv) INSTANTIATE_TESTS(depthwiseconv) +INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(exp) INSTANTIATE_TESTS(fully_connected) INSTANTIATE_TESTS(fused_batch_norm) INSTANTIATE_TESTS(gather) INSTANTIATE_TESTS(global_batch_norm) -INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(l2_pool) +INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(local_response_norm) INSTANTIATE_TESTS(max_pool) +INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(mul) INSTANTIATE_TESTS(pad) INSTANTIATE_TESTS(relu) @@ -260,14 +258,13 @@ INSTANTIATE_TESTS(reshape) INSTANTIATE_TESTS(resize_bilinear) INSTANTIATE_TESTS(sigmoid) INSTANTIATE_TESTS(softmax) +INSTANTIATE_TESTS(space_to_batch_nd) INSTANTIATE_TESTS(space_to_depth) -INSTANTIATE_TESTS(sub) INSTANTIATE_TESTS(split) -INSTANTIATE_TESTS(div) -INSTANTIATE_TESTS(transpose) -INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(squeeze) INSTANTIATE_TESTS(strided_slice) +INSTANTIATE_TESTS(sub) +INSTANTIATE_TESTS(transpose) } // namespace testing } // namespace tflite -- GitLab From 779d457008ab7ea2c11f4d73370099a1e56c0652 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Sun, 25 Feb 2018 21:39:52 +0900 Subject: [PATCH 0014/1262] fix typo --- .../python/kernel_tests/linalg/linear_operator_diag_test.py | 2 +- tensorflow/python/ops/linalg/linear_operator_diag.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py index 343d158498..8cb9f9e621 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py @@ -129,7 +129,7 @@ class LinearOperatorDiagTest( with self.test_session() as sess: x = random_ops.random_normal(shape=(2, 2, 3, 4)) - # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve + # This LinearOperatorDiag will be broadcast to (2, 2, 3, 3) during solve # and matmul with 'x' as the argument. diag = random_ops.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag, is_self_adjoint=True) diff --git a/tensorflow/python/ops/linalg/linear_operator_diag.py b/tensorflow/python/ops/linalg/linear_operator_diag.py index b3ec3d5b7c..e180e83026 100644 --- a/tensorflow/python/ops/linalg/linear_operator_diag.py +++ b/tensorflow/python/ops/linalg/linear_operator_diag.py @@ -67,7 +67,7 @@ class LinearOperatorDiag(linear_operator.LinearOperator): operator = LinearOperatorDiag(diag) # Create a shape [2, 1, 4, 2] vector. Note that this shape is compatible - # since the batch dimensions, [2, 1], are brodcast to + # since the batch dimensions, [2, 1], are broadcast to # operator.batch_shape = [2, 3]. y = tf.random_normal(shape=[2, 1, 4, 2]) x = operator.solve(y) -- GitLab From b569035378ef4a8595c64e5f398d74244cac376e Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Sun, 25 Feb 2018 21:44:12 +0900 Subject: [PATCH 0015/1262] fix typo --- tensorflow/contrib/slim/python/slim/data/parallel_reader.py | 2 +- tensorflow/python/ops/distributions/special_math.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index ad5e985487..b3343aef47 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -221,7 +221,7 @@ def parallel_read(data_sources, the data will be cycled through indefinitely. num_readers: a integer, number of Readers to create. reader_kwargs: an optional dict, of kwargs for the reader. - shuffle: boolean, wether should shuffle the files and the records by using + shuffle: boolean, whether should shuffle the files and the records by using RandomShuffleQueue as common_queue. dtypes: A list of types. The length of dtypes must equal the number of elements in each record. If it is None it will default to diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py index bed4cbb2c1..1d605c5dfc 100644 --- a/tensorflow/python/ops/distributions/special_math.py +++ b/tensorflow/python/ops/distributions/special_math.py @@ -213,7 +213,7 @@ def _ndtri(p): # Compute x for p <= exp(-2): x = z - log(z)/z - (1/z) P(1/z) / Q(1/z), # where z = sqrt(-2. * log(p)), and P/Q are chosen between two different - # arrays based on wether p < exp(-32). + # arrays based on whether p < exp(-32). z = math_ops.sqrt(-2. * math_ops.log(sanitized_mcp)) first_term = z - math_ops.log(z) / z second_term_small_p = (_create_polynomial(1. / z, p2) -- GitLab From f1f70ef5c268d6ce41bdab4867ed0f2e19d6f924 Mon Sep 17 00:00:00 2001 From: Hovhannes Harutyunyan Date: Mon, 26 Feb 2018 10:52:11 +0400 Subject: [PATCH 0016/1262] Remove code that was written for compatibility with old checked-in code. Update code to have 80 characters per line. --- tensorflow/contrib/lite/kernels/div_test.cc | 3 +- .../internal/optimized/optimized_ops.h | 41 ------------------- .../internal/reference/reference_ops.h | 41 ------------------- tensorflow/contrib/lite/kernels/sub_test.cc | 18 +++++--- 4 files changed, 15 insertions(+), 88 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/div_test.cc b/tensorflow/contrib/lite/kernels/div_test.cc index 78918a0d79..e67e0ec034 100644 --- a/tensorflow/contrib/lite/kernels/div_test.cc +++ b/tensorflow/contrib/lite/kernels/div_test.cc @@ -154,7 +154,8 @@ TEST(QuantizedDivOpTest, WithBroadcast) { {TensorType_UINT8, {}, -3.0, 3.0}, // always a scalar {TensorType_UINT8, {}, -3.0, 3.0}, ActivationFunctionType_NONE); - m.QuantizeAndPopulate(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.QuantizeAndPopulate(m.input1(), {-0.2, 0.2, 0.07, + 0.08, 0.11, -0.123}); m.QuantizeAndPopulate(m.input2(), {0.1}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index d12a3eca1d..b19f46beaa 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1973,19 +1973,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, const uint8* input2_data, const Dims<4>& input2_dims, int32 input2_offset, @@ -2033,21 +2020,6 @@ inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, - int32 input1_offset, const uint8* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, - input2_dims, input2_offset, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_data, output_dims); -} - // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, @@ -2121,19 +2093,6 @@ void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastSub(int left_shift, const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, int32 input1_multiplier, int input1_shift, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index c7b7687622..847075e207 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1249,19 +1249,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, const uint8* input2_data, const Dims<4>& input2_dims, int32 input2_offset, @@ -1309,21 +1296,6 @@ inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, - int32 input1_offset, const uint8* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, - input2_dims, input2_offset, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_data, output_dims); -} - inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, @@ -1392,19 +1364,6 @@ void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastSub(int left_shift, const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, int32 input1_multiplier, int input1_shift, diff --git a/tensorflow/contrib/lite/kernels/sub_test.cc b/tensorflow/contrib/lite/kernels/sub_test.cc index b2c6d05f62..1fd0ee2a0e 100644 --- a/tensorflow/contrib/lite/kernels/sub_test.cc +++ b/tensorflow/contrib/lite/kernels/sub_test.cc @@ -125,11 +125,17 @@ TEST(FloatSubOpModel, WithBroadcast) { TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector> inputs1 = { - {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; + {0.1, 0.2, 0.3, 0.4}, + {-0.2, 0.2, 0.4, 0.7}, + {-0.01, 0.2, 0.7, 0.3}}; std::vector> inputs2 = { - {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.2}, {0.6, 0.4, -0.18, 0.5}}; + {0.6, 0.4, 0.3, 0.1}, + {0.6, 0.4, 0.5, -0.2}, + {0.6, 0.4, -0.18, 0.5}}; std::vector> results = { - {-0.5, -0.2, 0.0, 0.3}, {-0.8, -0.2, -0.1, 0.9}, {-0.61, -0.2, 0.88, -0.2}}; + {-0.5, -0.2, 0.0, 0.3}, + {-0.8, -0.2, -0.1, 0.9}, + {-0.61, -0.2, 0.88, -0.2}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, @@ -179,7 +185,8 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) { m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, + ElementsAreArray(ArrayFloatNear({-2.1, -0.1, 0.4, + 0.3, 0.0, 1.9}, kQuantizedTolerance))) << "With shape number " << i; } @@ -198,7 +205,8 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcast) { m.QuantizeAndPopulate(m.input2(), {0.7}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, + ElementsAreArray(ArrayFloatNear({-2.7, -0.5, 0.0, + 0.1, 0.4, 1.3}, kQuantizedTolerance))) << "With shape number " << i; } -- GitLab From 62a05fe71ba5157e7abeb291f4b8b6ac7abf97fb Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Tue, 27 Feb 2018 11:51:05 +0000 Subject: [PATCH 0017/1262] Ensure that the backend_deps is a non-frozen object --- tensorflow/compiler/xla/tests/build_defs.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/build_defs.bzl b/tensorflow/compiler/xla/tests/build_defs.bzl index 610302ac12..eac2eb286c 100644 --- a/tensorflow/compiler/xla/tests/build_defs.bzl +++ b/tensorflow/compiler/xla/tests/build_defs.bzl @@ -137,7 +137,8 @@ def xla_test(name, backend_deps += ["//tensorflow/compiler/xla/tests:test_macros_gpu"] this_backend_tags += ["requires-gpu-sm35"] elif backend in plugins: - backend_deps = plugins[backend]["deps"] + backend_deps = [] + backend_deps += plugins[backend]["deps"] this_backend_copts += plugins[backend]["copts"] this_backend_tags += plugins[backend]["tags"] this_backend_args += plugins[backend]["args"] -- GitLab From 2e98952221bfe83fadc3054e66b2ff3c23c44a24 Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Tue, 27 Feb 2018 13:52:13 +0000 Subject: [PATCH 0018/1262] Allow the large R1 slice tests to be disabled --- tensorflow/compiler/xla/tests/slice_test.cc | 35 +++++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/tests/slice_test.cc b/tensorflow/compiler/xla/tests/slice_test.cc index fe36df160d..50cd56d2d4 100644 --- a/tensorflow/compiler/xla/tests/slice_test.cc +++ b/tensorflow/compiler/xla/tests/slice_test.cc @@ -211,6 +211,9 @@ class SliceR1Test : public ClientLibraryTestBase, } }; +// A version of SliceR1Test used to label and disable 'large' tests +class SliceR1LargeTest : public SliceR1Test {}; + string SliceR1TestDataToString(const ::testing::TestParamInfo& data) { const R1Spec& spec = data.param; return ::tensorflow::strings::Printf("%lld_%lld_%lld_%lld", spec.input_dim0, @@ -230,6 +233,18 @@ XLA_TEST_P(SliceR1Test, DoIt_U64) { Run(GetParam()); } XLA_TEST_P(SliceR1Test, DoIt_S64) { Run(GetParam()); } +XLA_TEST_P(SliceR1LargeTest, DoIt_F32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_F64) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_U32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_S32) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_U64) { Run(GetParam()); } + +XLA_TEST_P(SliceR1LargeTest, DoIt_S64) { Run(GetParam()); } + // Tests for R1 slice ops. // The format for each testcase is {input size, start, limit, stride}. // clang-format off @@ -237,12 +252,6 @@ INSTANTIATE_TEST_CASE_P( SliceR1TestInstantiation, SliceR1Test, ::testing::Values( -// TODO(b/69425338): This uses too much memory on GPU. -#ifndef XLA_TEST_BACKEND_GPU - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, - R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1}, -#endif R1Spec{10, 0, 0, 1}, R1Spec{10, 7, 7, 1}, R1Spec{10, 0, 5, 1}, @@ -278,6 +287,20 @@ INSTANTIATE_TEST_CASE_P( SliceR1TestDataToString ); +// TODO(b/69425338): This uses too much memory on GPU. +#ifndef XLA_TEST_BACKEND_GPU +INSTANTIATE_TEST_CASE_P( + SliceR1TestBigSlicesInstantiation, + SliceR1LargeTest, + ::testing::Values( + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024, 12 * 1024 * 1024, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 + 1, 12 * 1024 * 1024 - 1, 1}, + R1Spec{16 * 1024 * 1024, 4 * 1024 * 1024 - 1, 12 * 1024 * 1024 + 1, 1} + ), + SliceR1TestDataToString +); +#endif + INSTANTIATE_TEST_CASE_P( SliceStridedR1TestInstantiation, SliceR1Test, -- GitLab From 0489bf25930ea0dc4b7d8ffc792b0390bfbc06bc Mon Sep 17 00:00:00 2001 From: Jingwen Date: Tue, 27 Feb 2018 18:30:09 -0500 Subject: [PATCH 0019/1262] Include cstring in logging.cc for use of strrchr() --- tensorflow/core/platform/default/logging.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index 2b874da198..c6e5777c26 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include #include +#include #endif #include -- GitLab From ef4e8ad826c8946f8ff3e0f7e1b3bb3bec61010c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 21 Feb 2018 15:06:04 +0800 Subject: [PATCH 0020/1262] CLN: extract ApplyAdamBaseOp --- tensorflow/core/kernels/training_ops.cc | 146 +++++++++++++++--- tensorflow/core/kernels/training_ops.h | 13 ++ .../core/kernels/training_ops_gpu.cu.cc | 30 ++++ tensorflow/core/ops/training_ops.cc | 37 +++++ 4 files changed, 202 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 233aa03c32..7d383d980a 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -328,6 +328,45 @@ struct ApplyAdamSYCL { template struct ApplyAdam : ApplyAdamNonCuda {}; +template +struct ApplyAdaMaxNonCuda { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + typename TTypes::ConstScalar beta1_power, + typename TTypes::ConstScalar beta2_power, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar beta1, + typename TTypes::ConstScalar beta2, + typename TTypes::ConstScalar epsilon, + typename TTypes::ConstFlat grad, bool use_nesterov) { + if (use_nesterov) { + LOG(WARNING) << "AdaMax doesn't support use_nesterov yet, ignore it."; + } + m.device(d) += (grad - m) * (T(1) - beta1()); + // v == u + v.device(d) = (beta2() * v).cwiseMax(grad.abs()); + // var == θ + var.device(d) -= (lr * m) / ((T(1) - beta1_power()) * v); + } +}; + +#ifdef TENSORFLOW_USE_SYCL +template +struct ApplyAdaMaxSYCL { + void operator()(const SYCLDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + T beta1_power, T beta2_power, T lr, T beta1, T beta2, + T epsilon, typename TTypes::ConstFlat grad) { + m.device(d) += (grad - m) * (T(1) - beta1); + v.device(d) = (beta2 * v).cwiseMax(grad.abs()); + var.device(d) -= (lr * m) / ((T(1) - beta1_power) * v); + } +}; +#endif // TENSORFLOW_USE_SYCL + +template +struct ApplyAdaMax : ApplyAdaMaxNonCuda {}; + template struct ApplyRMSProp { void operator()(const CPUDevice& d, typename TTypes::Flat var, @@ -2477,10 +2516,12 @@ TF_CALL_double(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS -template -class ApplyAdamOp : public OpKernel { +template + class Functor> +class ApplyAdamBaseOp : public OpKernel { public: - explicit ApplyAdamOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + explicit ApplyAdamBaseOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("use_nesterov", &use_nesterov_)); } @@ -2553,11 +2594,11 @@ class ApplyAdamOp : public OpKernel { grad.shape().DebugString())); const Device& device = ctx->template eigen_device(); - functor::ApplyAdam()( - device, var.flat(), m.flat(), v.flat(), - beta1_power.scalar(), beta2_power.scalar(), lr.scalar(), - beta1.scalar(), beta2.scalar(), epsilon.scalar(), - grad.flat(), use_nesterov_); + auto functor = Functor(); + functor(device, var.flat(), m.flat(), v.flat(), + beta1_power.scalar(), beta2_power.scalar(), lr.scalar(), + beta1.scalar(), beta2.scalar(), epsilon.scalar(), + grad.flat(), use_nesterov_); MaybeForwardRefInputToRefOutput(ctx, 0, 0); } @@ -2568,10 +2609,11 @@ class ApplyAdamOp : public OpKernel { }; #ifdef TENSORFLOW_USE_SYCL -template -class ApplyAdamOp : public OpKernel { +template class Functor> +class ApplyAdamBaseOp : public OpKernel { public: - explicit ApplyAdamOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + explicit ApplyAdamBaseOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); } @@ -2672,9 +2714,10 @@ class ApplyAdamOp : public OpKernel { var.shape().DebugString(), " ", grad.shape().DebugString())); - functor::ApplyAdamSYCL()(device, var.flat(), m.flat(), v.flat(), - beta1_power, beta2_power, lr, beta1, beta2, - epsilon, grad.flat()); + auto functor = Functor(); + functor(device, var.flat(), m.flat(), v.flat(), + beta1_power, beta2_power, lr, beta1, beta2, + epsilon, grad.flat()); MaybeForwardRefInputToRefOutput(ctx, 0, 0); } @@ -2684,28 +2727,28 @@ class ApplyAdamOp : public OpKernel { }; #endif // TENSORFLOW_USE_SYCL -#define REGISTER_KERNELS(D, T) \ +#define REGISTER_KERNELS(D, T, F) \ REGISTER_KERNEL_BUILDER( \ Name("ApplyAdam").Device(DEVICE_##D).TypeConstraint("T"), \ - ApplyAdamOp); \ + ApplyAdamBaseOp); \ REGISTER_KERNEL_BUILDER(Name("ResourceApplyAdam") \ .HostMemory("var") \ .HostMemory("m") \ .HostMemory("v") \ .Device(DEVICE_##D) \ .TypeConstraint("T"), \ - ApplyAdamOp); -#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); - + ApplyAdamBaseOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T, functor::ApplyAdam); TF_CALL_half(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); +#undef REGISTER_CPU_KERNELS #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T); - +#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T, functor::ApplyAdamSYCL); TF_CALL_float(REGISTER_SYCL_KERNELS); TF_CALL_double(REGISTER_SYCL_KERNELS); +#undef REGISTER_SYCL_KERNELS #endif #if GOOGLE_CUDA @@ -2730,11 +2773,66 @@ DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // namespace functor -REGISTER_KERNELS(GPU, Eigen::half); -REGISTER_KERNELS(GPU, float); -REGISTER_KERNELS(GPU, double); +#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T, functor::ApplyAdam); +REGISTER_GPU_KERNELS(Eigen::half); +REGISTER_GPU_KERNELS(float); +REGISTER_GPU_KERNELS(double); +#undef REGISTER_GPU_KERNELS #endif +#undef REGISTER_KERNELS + +#define REGISTER_KERNELS(D, T, F) \ + REGISTER_KERNEL_BUILDER( \ + Name("ApplyAdaMax").Device(DEVICE_##D).TypeConstraint("T"), \ + ApplyAdamBaseOp); \ + REGISTER_KERNEL_BUILDER(Name("ResourceApplyAdaMax") \ + .HostMemory("var") \ + .HostMemory("m") \ + .HostMemory("v") \ + .Device(DEVICE_##D) \ + .TypeConstraint("T"), \ + ApplyAdamBaseOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T, functor::ApplyAdaMax); +TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS + +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T, functor::ApplyAdaMaxSYCL); +TF_CALL_float(REGISTER_SYCL_KERNELS); +TF_CALL_double(REGISTER_SYCL_KERNELS); +#undef REGISTER_SYCL_KERNELS +#endif + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ApplyAdaMax::operator()( \ + const GPUDevice& d, typename TTypes::Flat var, \ + typename TTypes::Flat m, typename TTypes::Flat v, \ + typename TTypes::ConstScalar beta1_power, \ + typename TTypes::ConstScalar beta2_power, \ + typename TTypes::ConstScalar lr, \ + typename TTypes::ConstScalar beta1, \ + typename TTypes::ConstScalar beta2, \ + typename TTypes::ConstScalar epsilon, \ + typename TTypes::ConstFlat grad, bool use_nesterov); \ + extern template struct ApplyAdaMax; +DECLARE_GPU_SPEC(Eigen::half); +DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); +#undef DECLARE_GPU_SPEC +} // namespace functor + +#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T, functor::ApplyAdaMax); +REGISTER_GPU_KERNELS(Eigen::half); +REGISTER_GPU_KERNELS(float); +REGISTER_GPU_KERNELS(double); +#undef REGISTER_GPU_KERNELS +#endif #undef REGISTER_KERNELS template diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h index 7ee956053a..46a5290210 100644 --- a/tensorflow/core/kernels/training_ops.h +++ b/tensorflow/core/kernels/training_ops.h @@ -139,6 +139,19 @@ struct ApplyAdam { typename TTypes::ConstFlat grad, bool use_nesterov); }; +template +struct ApplyAdaMax { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + typename TTypes::ConstScalar beta1_power, + typename TTypes::ConstScalar beta2_power, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar beta1, + typename TTypes::ConstScalar beta2, + typename TTypes::ConstScalar epsilon, + typename TTypes::ConstFlat grad, bool use_nesterov); +}; + template struct ApplyRMSProp { void operator()(const Device& d, typename TTypes::Flat var, diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 0376a3b2c6..1776c108ab 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -142,6 +142,32 @@ struct ApplyAdam { } }; +template +struct ApplyAdaMax { + void operator()(const GPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + typename TTypes::ConstScalar beta1_power, + typename TTypes::ConstScalar beta2_power, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar beta1, + typename TTypes::ConstScalar beta2, + typename TTypes::ConstScalar epsilon, + typename TTypes::ConstFlat grad, bool use_nesterov) { + Eigen::array::Tensor::Index, 1> bcast; + bcast[0] = grad.dimension(0); + Eigen::Sizes<1> single; + const auto one = static_cast(1.0); + m.device(d) = + m + (beta1.constant(one) - beta1).reshape(single).broadcast(bcast) * + (grad - m); + v.device(d) = + (beta2.reshape(single).broadcast(bcast) * v).cwiseMax(grad.abs()); + var.device(d) -= + (lr * m) / ((beta1_power.constant(one) - + beta1_power).reshape(single).broadcast(bcast) * v); + } +}; + template struct ApplyRMSProp { void operator()(const GPUDevice& d, typename TTypes::Flat var, @@ -278,6 +304,10 @@ template struct functor::ApplyAdam; template struct functor::ApplyAdam; template struct functor::ApplyAdam; +template struct functor::ApplyAdaMax; +template struct functor::ApplyAdaMax; +template struct functor::ApplyAdaMax; + template struct functor::ApplyRMSProp; template struct functor::ApplyRMSProp; template struct functor::ApplyRMSProp; diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc index 6ce9595fb6..6f107db3ea 100644 --- a/tensorflow/core/ops/training_ops.cc +++ b/tensorflow/core/ops/training_ops.cc @@ -737,6 +737,43 @@ REGISTER_OP("ResourceApplyAdam") return ApplyAdamShapeFn(c, false /* sparse */); }); +REGISTER_OP("ApplyAdaMax") + .Input("var: Ref(T)") + .Input("m: Ref(T)") + .Input("v: Ref(T)") + .Input("beta1_power: T") + .Input("beta2_power: T") + .Input("lr: T") + .Input("beta1: T") + .Input("beta2: T") + .Input("epsilon: T") + .Input("grad: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .Attr("use_nesterov: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAdamShapeFn(c, false /* sparse */); + }); + +REGISTER_OP("ResourceApplyAdaMax") + .Input("var: resource") + .Input("m: resource") + .Input("v: resource") + .Input("beta1_power: T") + .Input("beta2_power: T") + .Input("lr: T") + .Input("beta1: T") + .Input("beta2: T") + .Input("epsilon: T") + .Input("grad: T") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .Attr("use_nesterov: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAdamShapeFn(c, false /* sparse */); + }); + static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) { ShapeHandle unused; ShapeHandle s = ShapeOrHandleShape(c, 0); // var -- GitLab From 4d31dac8111b963ed427969c71c6957c929d3e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 21 Feb 2018 20:29:46 +0800 Subject: [PATCH 0021/1262] ENH: add AdaMaxOptimizer in python side --- tensorflow/contrib/opt/BUILD | 20 +++ tensorflow/contrib/opt/__init__.py | 2 + .../contrib/opt/python/training/adamax.py | 72 ++++++++++ .../opt/python/training/adamax_test.py | 124 ++++++++++++++++++ tensorflow/core/kernels/training_ops.cc | 2 +- 5 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/opt/python/training/adamax.py create mode 100644 tensorflow/contrib/opt/python/training/adamax_test.py diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 86ceda71b7..a86d150f7a 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -14,6 +14,7 @@ py_library( name = "opt_py", srcs = [ "__init__.py", + "python/training/adamax.py", "python/training/addsign.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/elastic_average_optimizer.py", @@ -48,6 +49,25 @@ py_library( ], ) +py_test( + name = "adamax_test", + srcs = ["python/training/adamax_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_oss", # b/73507407 + "notsan", # b/31055119 + ], + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) + py_test( name = "external_optimizer_test", srcs = ["python/training/external_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 6c1bb1adc0..4c13c8e247 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * @@ -36,6 +37,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ + 'AdaMaxOptimizer', 'PowerSignOptimizer', 'AddSignOptimizer', 'DelayCompensatedGradientDescentOptimizer', diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py new file mode 100644 index 0000000000..4e0c541d3a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -0,0 +1,72 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""AdaMax for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import adam +from tensorflow.python.training import training_ops +from tensorflow.python.util.tf_export import tf_export + + +@tf_export("train.AdaMaxOptimizer") +class AdaMaxOptimizer(adam.AdamOptimizer): + """Optimizer that implements the AdaMax algorithm. + + See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). + """ + + def _apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power, beta2_power = self._get_beta_accumulators() + return training_ops.apply_ada_max( + var, m, v, + math_ops.cast(beta1_power, var.dtype.base_dtype), + math_ops.cast(beta2_power, var.dtype.base_dtype), + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._beta1_t, var.dtype.base_dtype), + math_ops.cast(self._beta2_t, var.dtype.base_dtype), + math_ops.cast(self._epsilon_t, var.dtype.base_dtype), + grad, use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power, beta2_power = self._get_beta_accumulators() + return training_ops.resource_apply_ada_max( + var.handle, m.handle, v.handle, + math_ops.cast(beta1_power, grad.dtype.base_dtype), + math_ops.cast(beta2_power, grad.dtype.base_dtype), + math_ops.cast(self._lr_t, grad.dtype.base_dtype), + math_ops.cast(self._beta1_t, grad.dtype.base_dtype), + math_ops.cast(self._beta2_t, grad.dtype.base_dtype), + math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), + grad, use_locking=self._use_locking) + + def _apply_sparse_shared(self, grad, var, indices, scatter_add): + raise NotImplementedError() + + def _apply_sparse(self, grad, var): + raise NotImplementedError() diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py new file mode 100644 index 0000000000..a1499118dd --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -0,0 +1,124 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for AdaMax.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import adamax +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def adamax_update_numpy(param, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t = beta1 * m + (1 - beta1) * g_t + v_t = np.maximum(beta2 * v, np.abs(g_t)) + param_t = param - (alpha / (1 - beta1**t)) * m_t / v_t + return param_t, m_t, v_t + + +class AdaMaxOptimizerTest(test.TestCase): + + def doTestBasic(self, use_resource=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + with self.test_session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + opt_variables = opt.variables() + beta1_power, beta2_power = opt._get_beta_accumulators() + self.assertTrue(beta1_power is not None) + self.assertTrue(beta2_power is not None) + self.assertIn(beta1_power, opt_variables) + self.assertIn(beta2_power, opt_variables) + + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of Adam + for t in range(1, 4): + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + self.assertAllCloseAccordingToType(0.9**(t + 1), + self.evaluate(beta1_power)) + self.assertAllCloseAccordingToType(0.999**(t + 1), + self.evaluate(beta2_power)) + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + if use_resource: + self.assertEqual("var0_%d/Adam:0" % (i,), + opt.get_slot(var=var0, name="m").name) + + def testBasic(self): + with self.test_session(): + self.doTestBasic(use_resource=False) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 7d383d980a..b3b53d9ee0 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -346,7 +346,7 @@ struct ApplyAdaMaxNonCuda { // v == u v.device(d) = (beta2() * v).cwiseMax(grad.abs()); // var == θ - var.device(d) -= (lr * m) / ((T(1) - beta1_power()) * v); + var.device(d) -= (lr() * m) / ((T(1) - beta1_power()) * v); } }; -- GitLab From ba258d530f1af5fbcc8c1b72637dc7b2177a48c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 2 Mar 2018 19:33:30 +0800 Subject: [PATCH 0022/1262] ENH: support sparse grad --- .../contrib/opt/python/training/adamax.py | 51 +++++++++++++++++-- .../opt/python/training/adamax_test.py | 2 +- tensorflow/core/kernels/training_ops.cc | 4 +- .../core/kernels/training_ops_gpu.cu.cc | 5 +- 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index 4e0c541d3a..137fce769f 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -18,12 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.training import optimizer +from tensorflow.python.ops import state_ops from tensorflow.python.training import adam from tensorflow.python.training import training_ops from tensorflow.python.util.tf_export import tf_export @@ -65,8 +65,49 @@ class AdaMaxOptimizer(adam.AdamOptimizer): math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), grad, use_locking=self._use_locking) - def _apply_sparse_shared(self, grad, var, indices, scatter_add): - raise NotImplementedError() + def _apply_sparse_shared(self, grad, var, indices, + scatter_add, scatter_update): + beta1_power, beta2_power = self._get_beta_accumulators() + beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype) + beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype) + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) + beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) + epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_slice = array_ops.gather(m, indices) + m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t) + with ops.control_dependencies([m_t_slice]): + m_t = scatter_update(m, indices, m_t_slice) + # u_t = max(beta2 * u, abs(g_t)) + v = self.get_slot(var, "v") + v_slice = array_ops.gather(v, indices) + v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad)) + with ops.control_dependencies([v_t_slice]): + v_t = scatter_update(v, indices, v_t_slice) + # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t + var_slice = -lr_t / (1 - beta1_power) * (m_t_slice / + (v_t_slice + epsilon_t)) + with ops.control_dependencies([var_slice]): + var_update = scatter_add(var, indices, var_slice) + return control_flow_ops.group(*[var_update, m_t, v_t]) def _apply_sparse(self, grad, var): - raise NotImplementedError() + return self._apply_sparse_shared( + grad.values, var, grad.indices, + lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking), + lambda x, i, v: state_ops.scatter_update( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking)) + + def _resource_scatter_update(self, x, i, v): + with ops.control_dependencies( + [resource_variable_ops.resource_scatter_update( + x.handle, i, v)]): + return x.value() + + def _resource_apply_sparse(self, grad, var, indices): + return self._apply_sparse_shared( + grad, var, indices, + self._resource_scatter_add, self._resource_scatter_update) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index a1499118dd..0e2ba0987a 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -45,7 +45,7 @@ def adamax_update_numpy(param, epsilon=1e-8): m_t = beta1 * m + (1 - beta1) * g_t v_t = np.maximum(beta2 * v, np.abs(g_t)) - param_t = param - (alpha / (1 - beta1**t)) * m_t / v_t + param_t = param - (alpha / (1 - beta1**t)) * m_t / (v_t + epsilon) return param_t, m_t, v_t diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index b3b53d9ee0..0387e3011e 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -346,7 +346,7 @@ struct ApplyAdaMaxNonCuda { // v == u v.device(d) = (beta2() * v).cwiseMax(grad.abs()); // var == θ - var.device(d) -= (lr() * m) / ((T(1) - beta1_power()) * v); + var.device(d) -= lr() / (T(1) - beta1_power()) * (m / (v + epsilon())); } }; @@ -359,7 +359,7 @@ struct ApplyAdaMaxSYCL { T epsilon, typename TTypes::ConstFlat grad) { m.device(d) += (grad - m) * (T(1) - beta1); v.device(d) = (beta2 * v).cwiseMax(grad.abs()); - var.device(d) -= (lr * m) / ((T(1) - beta1_power) * v); + var.device(d) -= lr / (T(1) - beta1_power) * (m / (v + epsilon)); } }; #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 1776c108ab..54c06b130c 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -163,8 +163,9 @@ struct ApplyAdaMax { v.device(d) = (beta2.reshape(single).broadcast(bcast) * v).cwiseMax(grad.abs()); var.device(d) -= - (lr * m) / ((beta1_power.constant(one) - - beta1_power).reshape(single).broadcast(bcast) * v); + lr / (beta1_power.constant(one) - + beta1_power).reshape(single).broadcast(bcast) * + (m / (v + epsilon)); } }; -- GitLab From f6f5a6019970bb8d667819da7d6316a8088a0b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 3 Mar 2018 10:02:43 +0800 Subject: [PATCH 0023/1262] DOC: add docment --- .../contrib/opt/python/training/adamax.py | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index 137fce769f..ddae06bec7 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -29,7 +29,6 @@ from tensorflow.python.training import training_ops from tensorflow.python.util.tf_export import tf_export -@tf_export("train.AdaMaxOptimizer") class AdaMaxOptimizer(adam.AdamOptimizer): """Optimizer that implements the AdaMax algorithm. @@ -37,6 +36,56 @@ class AdaMaxOptimizer(adam.AdamOptimizer): ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). """ + def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, + use_locking=False, name="AdaMax"): + """Construct a new AdaMax optimizer. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + v_0 <- 0 (Initialize the exponentially weighted infinity norm) + t <- 0 (Initialize timestep) + ``` + + The update rule for `variable` with gradient `g` uses an optimization + described at the end of section7.1 of the paper: + + ``` + t <- t + 1 + lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) + + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + v_t <- max(beta2 * v_{t-1}, abs(g)) + variable <- variable - lr_t / (1 - beta1^t) * m_t / (v_t + epsilon) + ``` + + Similar to AdamOptimizer, the epsilon is added for numerical stability + (especially to get rid of division by zero when v_t = 0). + + Contrast to AdamOptimizer, the sparse implementation of this algorithm + (used when the gradient is an IndexedSlices object, typically because of + `tf.gather` or an embedding lookup in the forward pass) only updates + variable slices and corresponding `m_t`, `v_t` terms when that part of + the variable was used in the forward pass. This means that the sparse + behavior is contrast to the dense behavior (similar to some momentum + implementations which ignore momentum unless a variable slice was actually + used). + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + beta1: A float value or a constant float tensor. + The exponential decay rate for the 1st moment estimates. + beta2: A float value or a constant float tensor. + The exponential decay rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "AdaMax". + """ + super(AdaMaxOptimizer, self).__init__(learning_rate, beta1, beta2, + epsilon, use_locking, name) + def _apply_dense(self, grad, var): m = self.get_slot(var, "m") v = self.get_slot(var, "v") -- GitLab From f750e21a63c8836b9e7243ce786af2de3f65cc3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 3 Mar 2018 12:31:54 +0800 Subject: [PATCH 0024/1262] TST: add more tests --- .../contrib/opt/python/training/adamax.py | 2 +- .../opt/python/training/adamax_test.py | 243 +++++++++++++++++- 2 files changed, 233 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index ddae06bec7..36d49d4cbf 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -159,4 +159,4 @@ class AdaMaxOptimizer(adam.AdamOptimizer): def _resource_apply_sparse(self, grad, var, indices): return self._apply_sparse_shared( grad, var, indices, - self._resource_scatter_add, self._resource_scatter_update) + self._resource_scatter_add, self._resource_scatter_update) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 0e2ba0987a..e91e5cb96a 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -35,22 +35,142 @@ from tensorflow.python.platform import test def adamax_update_numpy(param, - g_t, - t, - m, - v, - alpha=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8): + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): m_t = beta1 * m + (1 - beta1) * g_t v_t = np.maximum(beta2 * v, np.abs(g_t)) - param_t = param - (alpha / (1 - beta1**t)) * m_t / (v_t + epsilon) + param_t = param - (alpha / (1 - beta1**t)) * (m_t / (v_t + epsilon)) + return param_t, m_t, v_t + + +def adamax_sparse_update_numpy(param, + indices, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t, v_t, param_t = np.copy(m), np.copy(v), np.copy(param) + m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t + v_t_slice = np.maximum(beta2 * v[indices], np.abs(g_t)) + param_t_slice = param[indices] - ((alpha / (1 - beta1**t)) * + (m_t_slice / (v_t_slice + epsilon))) + m_t[indices] = m_t_slice + v_t[indices] = v_t_slice + param_t[indices] = param_t_slice return param_t, m_t, v_t class AdaMaxOptimizerTest(test.TestCase): + def doTestSparse(self, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) + m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(), zero_slots() + var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([2, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0, 3.0], var0.eval()) + self.assertAllClose([4.0, 5.0, 6.0], var1.eval()) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_sparse_update_numpy( + var0_np, grads0_np_indices, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_sparse_update_numpy( + var1_np, grads1_np_indices, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + self.doTestSparse(use_resource=False) + + def testResourceSparse(self): + self.doTestSparse(use_resource=True) + + def testSparseDevicePlacement(self): + for index_dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(force_gpu=test.is_gpu_available()): + # If a GPU is available, tests that all optimizer ops can be placed on + # it (i.e. they have GPU kernels). + var = variables.Variable([[1.0], [2.0]]) + indices = constant_op.constant([0, 1], dtype=index_dtype) + gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices)) + optimizer = adamax.AdaMaxOptimizer(3.0) + minimize_op = optimizer.minimize(gathered_sum) + variables.global_variables_initializer().run() + minimize_op.run() + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant( + [0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant( + [0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + repeated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + def doTestBasic(self, use_resource=False): for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): with self.test_session(graph=ops.Graph()): @@ -93,7 +213,7 @@ class AdaMaxOptimizerTest(test.TestCase): beta1_power, beta2_power = opt._get_beta_accumulators() - # Run 3 steps of Adam + # Run 3 steps of AdaMax for t in range(1, 4): if context.in_graph_mode(): self.evaluate(update) @@ -112,13 +232,114 @@ class AdaMaxOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) if use_resource: - self.assertEqual("var0_%d/Adam:0" % (i,), + self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) def testBasic(self): with self.test_session(): self.doTestBasic(use_resource=False) + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer(constant_op.constant(0.001)) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer() + update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of intertwined AdaMax1 and AdaMax2. + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testTwoSessions(self): + optimizer = adamax.AdaMaxOptimizer() + g = ops.Graph() + with g.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + optimizer.apply_gradients([(grads0, var0)]) + + gg = ops.Graph() + with gg.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + + # If the optimizer saves any state not keyed by graph the following line + # fails. + optimizer.apply_gradients([(grads0, var0)]) + if __name__ == "__main__": test.main() -- GitLab From 8b5e4ad404ba16919ad4f17a763ee5383d61a400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 3 Mar 2018 17:39:56 +0800 Subject: [PATCH 0025/1262] DOC: add apidef --- .../contrib/opt/python/training/adamax.py | 3 +- .../base_api/api_def_ApplyAdaMax.pbtxt | 89 +++++++++++++++++++ .../api_def_ResourceApplyAdaMax.pbtxt | 83 +++++++++++++++++ 3 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyAdaMax.pbtxt diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index 36d49d4cbf..fe5522a170 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -53,11 +53,10 @@ class AdaMaxOptimizer(adam.AdamOptimizer): ``` t <- t + 1 - lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) m_t <- beta1 * m_{t-1} + (1 - beta1) * g v_t <- max(beta2 * v_{t-1}, abs(g)) - variable <- variable - lr_t / (1 - beta1^t) * m_t / (v_t + epsilon) + variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) ``` Similar to AdamOptimizer, the epsilon is added for numerical stability diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt new file mode 100644 index 0000000000..106c30ca83 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt @@ -0,0 +1,89 @@ +op { + graph_op_name: "ApplyAdaMax" + in_arg { + name: "var" + description: < Date: Mon, 5 Mar 2018 17:41:00 +0000 Subject: [PATCH 0026/1262] Update the documentation of `softmax_cross_entropy` This fix updates the documentation of `softmax_cross_entropy`, and removed the shape restrictions of `onehot_labels` and `logits`. They only needs to be of the same shape, not necessary `[batch_size, num_classes]`. This fix fixes 16263. Signed-off-by: Yong Tang --- tensorflow/python/ops/losses/losses_impl.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 7386976e93..04c13cb6c6 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -710,11 +710,16 @@ def softmax_cross_entropy( new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes + Note that `onehot_labels` and `logits` must have the same shape, + e.g. `[batch_size, num_classes]`. The shape of `weights` must be + broadcastable to loss, whose shape is decided by the shape of `logits`. + In case the shape of `logits` is `[batch_size, num_classes]`, loss is + a `Tensor` of shape `[batch_size]`. + Args: - onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels. - logits: `[batch_size, num_classes]` logits outputs of the network . - weights: Optional `Tensor` whose rank is either 0, or rank 1 and is - broadcastable to the loss which is a `Tensor` of shape `[batch_size]`. + onehot_labels: One-hot-encoded labels. + logits: Logits outputs of the network. + weights: Optional `Tensor` that is broadcastable to loss. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. -- GitLab From 82e34cd19f554509113d438ca98ad76e42fdf4e9 Mon Sep 17 00:00:00 2001 From: Hovhannes Harutyunyan Date: Wed, 7 Mar 2018 09:14:53 +0400 Subject: [PATCH 0027/1262] Remove quantized versiaon of Div till fixing it. --- .../internal/optimized/optimized_ops.h | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index b19f46beaa..9c181fddad 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1973,53 +1973,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, - int32 input1_offset, const uint8* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - gemmlowp::ScopedProfilingLabel label("BroadcastDiv/8bit"); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - for (int b = 0; b < ArraySize(output_dims, 3); ++b) { - for (int y = 0; y < ArraySize(output_dims, 2); ++y) { - for (int x = 0; x < ArraySize(output_dims, 1); ++x) { - for (int c = 0; c < ArraySize(output_dims, 0); ++c) { - const int32 input1_val = - input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; - const int32 input2_val = - input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; - const int32 unclamped_result = - output_offset + - MultiplyByQuantizedMultiplierSmallerThanOne( - input1_val / input2_val, output_multiplier, output_shift); - const int32 clamped_output = - std::min(output_activation_max, - std::max(output_activation_min, unclamped_result)); - output_data[Offset(output_dims, c, x, y, b)] = - static_cast(clamped_output); - } - } - } - } -} - // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, -- GitLab From f82d009d878dc675a307e69f89ba9f4dfdcd6c71 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Wed, 7 Mar 2018 21:58:39 +0800 Subject: [PATCH 0028/1262] Fix broken link of typical distributed configuration in graphs.md --- tensorflow/docs_src/programmers_guide/graphs.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index e69b717432..ca74b17542 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,9 +210,8 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` -If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed configuration}, -you might specify the job name and task ID to place variables on -a task in the parameter server job (`"/job:ps"`), and the other operations on + +If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): ```python -- GitLab From 04b6127510793b4c5aaa540b60b68ffdf3fd48ce Mon Sep 17 00:00:00 2001 From: imsheridan Date: Wed, 7 Mar 2018 22:23:50 +0800 Subject: [PATCH 0029/1262] revert the minor space nit --- tensorflow/docs_src/programmers_guide/graphs.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index ca74b17542..3b5e3e5a9a 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,8 +210,9 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` - -If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on +If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, +you might specify the job name and task ID to place variables on +a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): ```python -- GitLab From 2548a3d2cf035a229d35ab6257bee511aa3a8e23 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Thu, 8 Mar 2018 00:15:22 +0800 Subject: [PATCH 0030/1262] fix some typo --- tensorflow/docs_src/programmers_guide/graphs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 3b5e3e5a9a..f28660d44a 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -505,10 +505,10 @@ multiple graphs in the same process. As noted above, TensorFlow provides a "default graph" that is implicitly passed to all API functions in the same context. For many applications, a single graph is sufficient. However, TensorFlow also provides methods for manipulating -the default graph, which can be useful in more advanced used cases. For example: +the default graph, which can be useful in more advanced use cases. For example: * A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each - operation in a single graph must have a unique name. TensorFlow will + operation in a single graph must have an unique name. TensorFlow will "uniquify" the names of operations by appending `"_1"`, `"_2"`, and so on to their names if the requested name is already taken. Using multiple explicitly created graphs gives you more control over what name is given to each -- GitLab From 955f41c5f2240495a086b503e54eac6928876aca Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 7 Mar 2018 14:04:26 -0800 Subject: [PATCH 0031/1262] Cleanup `astor` output to match `codegen` output. The default `astor` output messes up the function signature docs for many docs without a bit of cleanup. With this change the only differences I see are parens around lambdas and math expressions in default arguments. --- tensorflow/tools/docs/parser.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 1798378d55..0fcd0abc4a 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -650,6 +650,9 @@ def _remove_first_line_indent(string): return '\n'.join([line[indent:] for line in string.split('\n')]) +PAREN_NUMBER_RE = re.compile("^\(([0-9.e-]+)\)") + + def _generate_signature(func, reverse_index): """Given a function, returns a list of strings representing its args. @@ -705,7 +708,11 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = astor.to_source(ast_default) + default_text = ( + astor.to_source(ast_default).rstrip('\n').replace('\t','\\t') + .replace('\n','\\n').replace('"""',"'")) + default_text = PAREN_NUMBER_RE.sub('\\1',default_text) + if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. -- GitLab From c22d11f4fcc2801d0a5de98a84461e03e1bcb674 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 7 Mar 2018 14:14:08 -0800 Subject: [PATCH 0032/1262] add back docs --- tensorflow/docs_src/community/documentation.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 8d55148e48..f7b7ba14e5 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,7 +148,19 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If bazel is not installed on your machine, install it now. If you are on +1. If pip isn't installed on your machine, install it now by issuing the +following command: + + $ sudo easy_install pip + +2. Use pip to install mock and pandas by issuing the following + command (Note: If you are using + a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your + dependencies, you may not want to use sudo for these installations): + + $ sudo pip install mock pandas + +3. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -156,10 +168,10 @@ doing the following: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -2. Change directory to the top-level `tensorflow` directory of the TensorFlow +4. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -3. Run the `configure` script and answer its prompts appropriately for your +5. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure -- GitLab From cbb517551964879dcb6eac2b00bf74db6c827975 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 7 Mar 2018 14:54:24 -0800 Subject: [PATCH 0033/1262] Revert "add back docs" This reverts commit c22d11f4fcc2801d0a5de98a84461e03e1bcb674. --- tensorflow/docs_src/community/documentation.md | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index f7b7ba14e5..8d55148e48 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,19 +148,7 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If pip isn't installed on your machine, install it now by issuing the -following command: - - $ sudo easy_install pip - -2. Use pip to install mock and pandas by issuing the following - command (Note: If you are using - a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your - dependencies, you may not want to use sudo for these installations): - - $ sudo pip install mock pandas - -3. If bazel is not installed on your machine, install it now. If you are on +1. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -168,10 +156,10 @@ following command: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -4. Change directory to the top-level `tensorflow` directory of the TensorFlow +2. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -5. Run the `configure` script and answer its prompts appropriately for your +3. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure -- GitLab From d34eaf348848fe153a5fd245aa75c2ca32973b36 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 7 Mar 2018 21:53:25 -0800 Subject: [PATCH 0034/1262] fix encoding and lint --- tensorflow/tools/docs/build_docs_test.py | 1 - tensorflow/tools/docs/generate_lib.py | 13 ++++++------- tensorflow/tools/docs/parser.py | 6 +++--- tensorflow/tools/docs/py_guide_parser.py | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py index 2e8f634e7c..0cbf8b478f 100644 --- a/tensorflow/tools/docs/build_docs_test.py +++ b/tensorflow/tools/docs/build_docs_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import os -import sys import textwrap import tensorflow as tf diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 635408d87f..a7ab0fa538 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -21,7 +21,6 @@ from __future__ import print_function import argparse import fnmatch import os -import sys import six @@ -134,8 +133,8 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): try: if not os.path.exists(directory): os.makedirs(directory) - with open(path, 'w') as f: - f.write(pretty_docs.build_md_page(page_info)) + with open(path, 'wb') as f: + f.write(pretty_docs.build_md_page(page_info).encode('utf-8')) except OSError as e: print('Cannot write documentation for %s to %s: %s' % (full_name, directory, e)) @@ -434,19 +433,19 @@ def _other_docs(src_dir, output_dir, reference_resolver, file_pattern='*.md'): full_out_path = os.path.join(output_dir, suffix) if not fnmatch.fnmatch(base_name, file_pattern): print('Copying un-matched file %s...' % suffix) - open(full_out_path, 'w').write(open(full_in_path).read()) + open(full_out_path, 'wb').write(open(full_in_path, 'rb').read()) continue if dirpath.endswith('/api_guides/python'): print('Processing Python guide %s...' % base_name) content = tag_updater.process(full_in_path) else: print('Processing doc %s...' % suffix) - content = open(full_in_path).read() + content = open(full_in_path, 'rb').read().decode('utf-8') content = reference_resolver.replace_references(content, relative_path_to_root) - with open(full_out_path, 'w') as f: - f.write(content) + with open(full_out_path, 'wb') as f: + f.write(content.encode('utf-8')) print('Done.') diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 0fcd0abc4a..dd0351b4c6 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -709,9 +709,9 @@ def _generate_signature(func, reverse_index): default_text = reverse_index[id(default)] elif ast_default is not None: default_text = ( - astor.to_source(ast_default).rstrip('\n').replace('\t','\\t') - .replace('\n','\\n').replace('"""',"'")) - default_text = PAREN_NUMBER_RE.sub('\\1',default_text) + astor.to_source(ast_default).rstrip('\n').replace('\t', '\\t') + .replace('\n', '\\n').replace('"""', "'")) + default_text = PAREN_NUMBER_RE.sub('\\1', default_text) if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. diff --git a/tensorflow/tools/docs/py_guide_parser.py b/tensorflow/tools/docs/py_guide_parser.py index 216353ecee..328f42d18f 100644 --- a/tensorflow/tools/docs/py_guide_parser.py +++ b/tensorflow/tools/docs/py_guide_parser.py @@ -44,7 +44,7 @@ class PyGuideParser(object): def process(self, full_path): """Read and process the file at `full_path`.""" - md_string = open(full_path).read() + md_string = open(full_path, 'rb').read().decode('utf-8') self._lines = md_string.split('\n') seen = set() -- GitLab From f7a04228e0368f3c9bad22a66fe7267e41ecb128 Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Thu, 8 Mar 2018 07:05:53 +0000 Subject: [PATCH 0035/1262] Register half in some ops which support all floating point types --- tensorflow/core/ops/nn_ops.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 910fbaca9e..6d4a3fda51 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -472,7 +472,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropInput") .Input("filter: T") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -490,7 +490,7 @@ REGISTER_OP("DepthwiseConv2dNativeBackpropFilter") .Input("filter_sizes: int32") .Input("out_backprop: T") .Output("output: T") - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .Attr("strides: list(int)") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) @@ -589,7 +589,7 @@ REGISTER_OP("AvgPool3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn(shape_inference::Pool3DShape); REGISTER_OP("AvgPool3DGrad") @@ -600,7 +600,7 @@ REGISTER_OP("AvgPool3DGrad") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float, double}") + .Attr("T: {half, bfloat16, float, double}") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); @@ -618,7 +618,7 @@ REGISTER_OP("MaxPool3D") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float}") + .Attr("T: {half, bfloat16, float}") .SetShapeFn(shape_inference::Pool3DShape); REGISTER_OP("MaxPool3DGrad") @@ -630,8 +630,8 @@ REGISTER_OP("MaxPool3DGrad") .Attr("strides: list(int) >= 5") .Attr(GetPaddingAttrString()) .Attr(GetConvnet3dDataFormatAttrString()) - .Attr("T: {bfloat16, float} = DT_FLOAT") - .Attr("TInput: {bfloat16, float} = DT_FLOAT") + .Attr("T: {half, bfloat16, float} = DT_FLOAT") + .Attr("TInput: {half, bfloat16, float} = DT_FLOAT") .SetShapeFn([](InferenceContext* c) { return UnchangedShapeWithRank(c, 5); }); -- GitLab From cee41f9d10b81ce3b49f566ddd448a7f3f2872c3 Mon Sep 17 00:00:00 2001 From: KB Sriram Date: Wed, 7 Mar 2018 08:11:03 -0800 Subject: [PATCH 0036/1262] C++ gradient for StridedSlice See https://github.com/tensorflow/tensorflow/issues/9645 --- tensorflow/cc/gradients/array_grad.cc | 36 ++++++++++++++++++++++ tensorflow/cc/gradients/array_grad_test.cc | 24 +++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index 6545e4ee3e..ff348fadb2 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad); +Status StridedSliceGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + Input x = Shape(scope, op.input(0)); + Input begin = op.input(1); + Input end = op.input(2); + Input strides = op.input(3); + int64 begin_mask; + int64 end_mask; + int64 ellipsis_mask; + int64 new_axis_mask; + int64 shrink_axis_mask; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask)); + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask)); + grad_outputs->push_back( + StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0], + StridedSliceGrad::BeginMask(begin_mask) + .EndMask(end_mask) + .EllipsisMask(ellipsis_mask) + .NewAxisMask(new_axis_mask) + .ShrinkAxisMask(shrink_axis_mask))); + // No gradients returned for begin, end and strides + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 4a215fcc92..2a2180297c 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) { RunTest(x, x_shape, y, y_shape); } +TEST_F(ArrayGradTest, StridedSliceGrad) { + TensorShape x_shape({6, 4, 4}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + + // y = x[2:6:2, 1:3, 1:3] + auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}); + // y.shape = [2, 2, 2]; + RunTest(x, x_shape, y, {2, 2, 2}); + + // y = x[2:6:2, 1:3, 1:3] + // begin_mask = 1<<1 (ignore begin_index = 1) + // end_mask = 1<<2 (ignore end_index = 2) + y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, + StridedSlice::BeginMask(1<<1).EndMask(1<<2)); + // y.shape = [2, 3, 3]; + RunTest(x, x_shape, y, {2, 3, 3}); + + // y = [tf.newaxis, 2:6:2, 1:3, 1:3] + y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, + StridedSlice::NewAxisMask(1<<0)); + // y.shape = [1, 2, 2, 2]; + RunTest(x, x_shape, y, {1, 2, 2, 2}); +} + } // namespace } // namespace tensorflow -- GitLab From e31fb25f4e3989a846a8e54d789a3bf5efff0cea Mon Sep 17 00:00:00 2001 From: KB Sriram Date: Thu, 8 Mar 2018 07:40:24 -0800 Subject: [PATCH 0037/1262] Clang-format fixes. --- tensorflow/cc/gradients/array_grad_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 2a2180297c..de3bd0fc9e 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -367,13 +367,13 @@ TEST_F(ArrayGradTest, StridedSliceGrad) { // begin_mask = 1<<1 (ignore begin_index = 1) // end_mask = 1<<2 (ignore end_index = 2) y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, - StridedSlice::BeginMask(1<<1).EndMask(1<<2)); + StridedSlice::BeginMask(1 << 1).EndMask(1 << 2)); // y.shape = [2, 3, 3]; RunTest(x, x_shape, y, {2, 3, 3}); // y = [tf.newaxis, 2:6:2, 1:3, 1:3] y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, - StridedSlice::NewAxisMask(1<<0)); + StridedSlice::NewAxisMask(1 << 0)); // y.shape = [1, 2, 2, 2]; RunTest(x, x_shape, y, {1, 2, 2, 2}); } -- GitLab From d6533df7cd3ef19b39081a64fcb0bed5f83c7ee0 Mon Sep 17 00:00:00 2001 From: Giuseppe Date: Thu, 8 Mar 2018 17:49:29 +0100 Subject: [PATCH 0038/1262] Fix markdown error in layers tutorial. --- tensorflow/docs_src/tutorials/layers.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index ee03f440c9..b24d3f4cad 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -192,8 +192,7 @@ dive deeper into the `tf.layers` code used to create each layer, as well as how to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just -skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training-and-evaluating-the-cnn-mnist-classifier). +skip ahead to ["Training and Evaluating the CNN MNIST Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). ### Input Layer @@ -534,9 +533,8 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining -> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in -> tf.estimator"} tutorial. +> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"} +> in the @{$get_started/custom_estimators$"Creating Estimations in tf.estimator"} tutorial. ### Add evaluation metrics -- GitLab From e8cf1fb7dc9dabe1a2a0b181a7b587c1300888a3 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Thu, 8 Mar 2018 14:07:30 -0800 Subject: [PATCH 0039/1262] Use getfullargspec in signature parsing. --- tensorflow/python/util/tf_inspect.py | 36 ++++++++++++++++++++++------ tensorflow/tools/docs/parser.py | 34 +++++++++++++------------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index c4168f7b1a..1fbc33ba0b 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -18,12 +18,22 @@ from __future__ import division from __future__ import print_function import inspect as _inspect +import six +from collections import namedtuple from tensorflow.python.util import tf_decorator ArgSpec = _inspect.ArgSpec +if six.PY3: + FullArgSpec = _inspect.FullArgSpec +else: + FullArgSpec = namedtuple( + 'FullArgSpec', ['args', 'varargs', 'varkw', 'defaults', + 'kwonlyargs', 'kwonlydefaults', 'annotations']) + + def currentframe(): """TFDecorator-aware replacement for inspect.currentframe.""" return _inspect.stack()[1][0] @@ -46,20 +56,32 @@ def getargspec(object): # pylint: disable=redefined-builtin def getfullargspec(obj): # pylint: disable=redefined-builtin - """TFDecorator-aware replacement for inspect.getfullargspec and fallback to - inspect.getargspec in Python 2. + """TFDecorator-aware replacement for inspect.getfullargspec. Args: obj: A callable, possibly decorated. Returns: - The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of + The `FullArgSpec` that describes the signature of the outermost decorator that changes the callable's signature. If the - callable is not decorated, `inspect.getfullargspec()` - (`inspect.getargspec()` in Python 2) will be called directly on the - callable. + callable is not decorated, `inspect.getfullargspec()` will be called + directly on the callable. """ - spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec')) + if six.PY2: + def spec_fn(target): + argspecs = _inspect.getargspec(target) + fullargspecs = FullArgSpec( + args=argspecs.args, + varargs=argspecs.varargs, + varkw=argspecs.keywords, + defaults=argspecs.defaults, + kwonlyargs=[], + kwonlydefaults={}, + annotations={}) + return fullargspecs + else: + spec_fn = _inspect.getfullargspec + decorators, target = tf_decorator.unwrap(obj) return next((d.decorator_argspec for d in decorators if d.decorator_argspec is not None), spec_fn(target)) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index dd0351b4c6..16513d0ee1 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -601,20 +601,20 @@ def _parse_md_docstring(py_object, relative_path_to_root, reference_resolver): def _get_arg_spec(func): """Extracts signature information from a function or functools.partial object. - For functions, uses `tf_inspect.getargspec`. For `functools.partial` objects, - corrects the signature of the underlying function to take into account the - removed arguments. + For functions, uses `tf_inspect.getfullargspec`. For `functools.partial` + objects, corrects the signature of the underlying function to take into + account the removed arguments. Args: func: A function whose signature to extract. Returns: - An `ArgSpec` namedtuple `(args, varargs, keywords, defaults)`, as returned - by `tf_inspect.getargspec`. + An `FullArgSpec` namedtuple `(args, varargs, varkw, defaults, etc.)`, + as returned by `tf_inspect.getfullargspec`. """ - # getargspec does not work for functools.partial objects directly. + # getfullargspec does not work for functools.partial objects directly. if isinstance(func, functools.partial): - argspec = tf_inspect.getargspec(func.func) + argspec = tf_inspect.getfullargspec(func.func) # Remove the args from the original function that have been used up. first_default_arg = ( len(argspec.args or []) - len(argspec.defaults or [])) @@ -637,12 +637,14 @@ def _get_arg_spec(func): argspec_defaults.pop(i-first_default_arg) else: first_default_arg -= 1 - return tf_inspect.ArgSpec(args=argspec_args, - varargs=argspec.varargs, - keywords=argspec.keywords, - defaults=tuple(argspec_defaults)) + # NOTE Some fields from FullArgSpec were removed here. + # Add them back if needed in the future. + return tf_inspect.FullArgSpec(args=argspec_args, + varargs=argspec.varargs, + varkw=argspec.varkw, + defaults=tuple(argspec_defaults)) else: # Regular function or method, getargspec will work fine. - return tf_inspect.getargspec(func) + return tf_inspect.getfullargspec(func) def _remove_first_line_indent(string): @@ -657,7 +659,7 @@ def _generate_signature(func, reverse_index): """Given a function, returns a list of strings representing its args. This function produces a list of strings representing the arguments to a - python function. It uses tf_inspect.getargspec, which + python function. It uses tf_inspect.getfullargspec, which does not generalize well to Python 3.x, which is more flexible in how *args and **kwargs are handled. This is not a problem in TF, since we have to remain compatible to Python 2.7 anyway. @@ -710,7 +712,7 @@ def _generate_signature(func, reverse_index): elif ast_default is not None: default_text = ( astor.to_source(ast_default).rstrip('\n').replace('\t', '\\t') - .replace('\n', '\\n').replace('"""', "'")) + .replace('\n', '\\n').replace('"""', "'")) default_text = PAREN_NUMBER_RE.sub('\\1', default_text) if default_text != repr(default): @@ -745,8 +747,8 @@ def _generate_signature(func, reverse_index): # Add *args and *kwargs. if argspec.varargs: args_list.append('*' + argspec.varargs) - if argspec.keywords: - args_list.append('**' + argspec.keywords) + if argspec.varkw: + args_list.append('**' + argspec.varkw) return args_list -- GitLab From 8cf2a1f0db40174cd6feab96c07e47ba8349d11c Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Thu, 8 Mar 2018 14:18:54 -0800 Subject: [PATCH 0040/1262] fix encoding again --- tensorflow/tools/docs/generate_lib.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index a7ab0fa538..d9e8069a61 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -133,8 +133,12 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): try: if not os.path.exists(directory): os.makedirs(directory) + # This function returns raw bytes in PY2 or unicode in PY3. + text = pretty_docs.build_md_page(page_info) + if six.PY3: + text = text.encode('utf-8') with open(path, 'wb') as f: - f.write(pretty_docs.build_md_page(page_info).encode('utf-8')) + f.write(text) except OSError as e: print('Cannot write documentation for %s to %s: %s' % (full_name, directory, e)) -- GitLab From b4db970c338123ee3156bb0e216193bde35d4b17 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 13 Mar 2018 00:04:33 +0800 Subject: [PATCH 0041/1262] fix broken link of tensor-like type --- tensorflow/docs_src/programmers_guide/graphs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index f28660d44a..81fd99cb4a 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -362,7 +362,7 @@ operations that are needed to compute the result. @{tf.Session.run} requires you to specify a list of **fetches**, which determine the return values, and may be a @{tf.Operation}, a @{tf.Tensor}, or -a [tensor-like type](#tensor-like-objects) such as @{tf.Variable}. These fetches +a [tensor-like type](#tensor-like_objects) such as @{tf.Variable}. These fetches determine what **subgraph** of the overall @{tf.Graph} must be executed to produce the result: this is the subgraph that contains all operations named in the fetch list, plus all operations whose outputs are used to compute the value -- GitLab From 66b38c5e7af4b607f393973d18aaabb6e00f9723 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 12 Mar 2018 12:56:59 -0700 Subject: [PATCH 0042/1262] Block docs for str, repr, hash. No python2 code is generating useful docs for these, and in python3 many useless docs are generated, so I've blocked them. --- tensorflow/tools/docs/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 5f2a411bae..95155b1149 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -1127,7 +1127,8 @@ class _ClassPageInfo(object): # Remove builtin members that we never want to document. if short_name in ['__class__', '__base__', '__weakref__', '__doc__', '__module__', '__dict__', '__abstractmethods__', - '__slots__', '__getnewargs__']: + '__slots__', '__getnewargs__', '__str__', + '__repr__', '__hash__']: continue child_name = '.'.join([self.full_name, short_name]) @@ -1172,7 +1173,7 @@ class _ClassPageInfo(object): # obvious what they do, don't include them in the docs if there's no # docstring. if not child_doc.brief.strip() and short_name in [ - '__str__', '__repr__', '__hash__', '__del__', '__copy__']: + '__del__', '__copy__']: print('Skipping %s, defined in %s, no docstring.' % (child_name, defining_class)) continue -- GitLab From 1f03b013ef00c128cf8331f274524a23d86ac458 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 13 Mar 2018 16:44:57 +0800 Subject: [PATCH 0043/1262] revert wrong typo fix --- tensorflow/docs_src/programmers_guide/graphs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 81fd99cb4a..69eb6df5f6 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -508,7 +508,7 @@ is sufficient. However, TensorFlow also provides methods for manipulating the default graph, which can be useful in more advanced use cases. For example: * A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each - operation in a single graph must have an unique name. TensorFlow will + operation in a single graph must have a unique name. TensorFlow will "uniquify" the names of operations by appending `"_1"`, `"_2"`, and so on to their names if the requested name is already taken. Using multiple explicitly created graphs gives you more control over what name is given to each -- GitLab From d751b6bfa84dae1be9835fc40cc3094a8205a74e Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 13 Mar 2018 23:11:47 +0800 Subject: [PATCH 0044/1262] Fix link of typical distributed configuration --- tensorflow/docs_src/programmers_guide/graphs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 69eb6df5f6..e4095cf7dd 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,7 +210,7 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` -If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, +If you are deploying TensorFlow in a @{$distributed$typical distributed configuration}, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): -- GitLab From b618740a8754e85a2a6ee142028105f76a4d5d58 Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Fri, 16 Mar 2018 00:11:38 +0900 Subject: [PATCH 0045/1262] implement matrix 2-norm --- tensorflow/python/ops/linalg_ops.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 37470e00d7..110b766a6e 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -454,7 +454,7 @@ def norm(tensor, This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and - matrix norms (Frobenius, 1-norm, and inf-norm). + matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` @@ -465,7 +465,7 @@ def norm(tensor, Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, - `np.inf` are supported. + `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector @@ -521,8 +521,7 @@ def norm(tensor, axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers") - # TODO(rmlarsen): Implement matrix 2-norm using tf.svd(). - supported_matrix_norms = ['euclidean', 'fro', 1, np.inf] + supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError("'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) @@ -539,10 +538,20 @@ def norm(tensor, with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) + rank = len(tensor.get_shape().as_list()) + axis = tuple(map(lambda i: i if i >= 0 else i + rank, axis)) + if ord in ['fro', 'euclidean', 2, 2.0]: - # TODO(rmlarsen): Move 2-norm to a separate clause once we support it for - # matrices. - result = math_ops.sqrt( + if is_matrix_norm and ord in [2, 2.0]: + axes = list(range(rank)) + perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) + perm_after = list(map(lambda i: perm_before.index(i), axes)) + result = array_ops.transpose(array_ops.expand_dims(math_ops.reduce_max( + gen_linalg_ops.svd(array_ops.transpose(tensor, perm=perm_before), + compute_uv=False)[0], axis=-1, keepdims=True), + axis=-1), perm=perm_after) + else: + result = math_ops.sqrt( math_ops.reduce_sum( tensor * math_ops.conj(tensor), axis, keepdims=True)) else: -- GitLab From a280a1d0cfd64831857826db639a3ee0180094de Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Fri, 16 Mar 2018 00:32:34 +0900 Subject: [PATCH 0046/1262] follow python coding style --- tensorflow/python/ops/linalg_ops.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 110b766a6e..b467711e3b 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -546,14 +546,15 @@ def norm(tensor, axes = list(range(rank)) perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) perm_after = list(map(lambda i: perm_before.index(i), axes)) - result = array_ops.transpose(array_ops.expand_dims(math_ops.reduce_max( - gen_linalg_ops.svd(array_ops.transpose(tensor, perm=perm_before), - compute_uv=False)[0], axis=-1, keepdims=True), - axis=-1), perm=perm_after) + result = array_ops.transpose(array_ops.expand_dims( + math_ops.reduce_max(gen_linalg_ops.svd( + array_ops.transpose(tensor, perm=perm_before), + compute_uv=False)[0], axis=-1, keepdims=True), axis=-1), + perm=perm_after) else: result = math_ops.sqrt( - math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keepdims=True)) + math_ops.reduce_sum( + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: -- GitLab From cc10ac9b7d593375a7cee0c167c20989dc29e8cf Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Fri, 16 Mar 2018 00:40:05 +0900 Subject: [PATCH 0047/1262] remove unnecessary lambda --- tensorflow/python/ops/linalg_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index b467711e3b..db6ce71125 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -545,7 +545,7 @@ def norm(tensor, if is_matrix_norm and ord in [2, 2.0]: axes = list(range(rank)) perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) - perm_after = list(map(lambda i: perm_before.index(i), axes)) + perm_after = list(map(perm_before.index, axes)) result = array_ops.transpose(array_ops.expand_dims( math_ops.reduce_max(gen_linalg_ops.svd( array_ops.transpose(tensor, perm=perm_before), -- GitLab From b21ceeb518ca9462a247d8be05870f12bebad201 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Mar 2018 23:13:25 -0700 Subject: [PATCH 0048/1262] Enhancement with deprecated_argument_lookup for argmax This fix makes some enhancement for argmax, using deprecated_argument_lookup instread of customerized logic. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e18d0e9501..9a88b71398 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -208,11 +208,9 @@ def argmax(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) -- GitLab From 82571ca199869f60fe2036d15d0071031d997b47 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Mar 2018 23:15:37 -0700 Subject: [PATCH 0049/1262] Enhancement with deprecated_argument_lookup for argmin This fix makes some enhancement for argmin, using deprecated_argument_lookup instread of customerized logic. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 9a88b71398..a2892d206d 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -226,11 +226,9 @@ def argmin(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_min(input, axis, name=name, output_type=output_type) -- GitLab From 52fef7f6b8b41d4fffa92bddcb78d96eb6333051 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Fri, 16 Mar 2018 16:03:26 +0900 Subject: [PATCH 0050/1262] fix typo --- tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 272410c693..7651a03fe5 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -398,7 +398,7 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNorms) { } TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { - // Test axis is not 3, so all weigths and offsets are fused to each of inputs + // Test axis is not 3, so all weights and offsets are fused to each of inputs // of conv2d. TestFoldFusedBatchNormsWithConcat(/*split=*/true); // Test axis = 3, BatchNorm weights and offsets will be split before fused -- GitLab From 20424e92417b520d7ea8c7323eee46538d2b909f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 17 Mar 2018 09:30:24 +0800 Subject: [PATCH 0051/1262] CLN: remove the unused import: tf_export --- tensorflow/contrib/opt/python/training/adamax.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index fe5522a170..65918831e9 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -26,7 +26,6 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.training import adam from tensorflow.python.training import training_ops -from tensorflow.python.util.tf_export import tf_export class AdaMaxOptimizer(adam.AdamOptimizer): -- GitLab From b5ebb7e9e5f5ae59e6db93bb5950f4bb68bf9e18 Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Sun, 18 Mar 2018 00:48:46 +0900 Subject: [PATCH 0052/1262] update norm_op_test --- tensorflow/python/kernel_tests/norm_op_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py index d85512fae6..d6625b69ef 100644 --- a/tensorflow/python/kernel_tests/norm_op_test.py +++ b/tensorflow/python/kernel_tests/norm_op_test.py @@ -85,8 +85,6 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if ((not is_matrix_norm and ord_ == "fro") or (is_matrix_norm and is_fancy_p_norm)): self.skipTest("Not supported by neither numpy.linalg.norm nor tf.norm") - if is_matrix_norm and ord_ == 2: - self.skipTest("Not supported by tf.norm") if ord_ == 'euclidean' or (axis_ is None and len(shape) > 2): self.skipTest("Not supported by numpy.linalg.norm") matrix = np.random.randn(*shape_).astype(dtype_) -- GitLab From c53160a2a5decdae30bda6e8f40b45f3b4dd9f8e Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Sun, 18 Mar 2018 00:49:13 +0900 Subject: [PATCH 0053/1262] use tf function instead of np --- tensorflow/python/ops/linalg_ops.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index db6ce71125..d8150d85b9 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import math_ops # pylint: disable=wildcard-import @@ -538,19 +539,27 @@ def norm(tensor, with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) - rank = len(tensor.get_shape().as_list()) - axis = tuple(map(lambda i: i if i >= 0 else i + rank, axis)) if ord in ['fro', 'euclidean', 2, 2.0]: if is_matrix_norm and ord in [2, 2.0]: - axes = list(range(rank)) - perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) - perm_after = list(map(perm_before.index, axes)) - result = array_ops.transpose(array_ops.expand_dims( - math_ops.reduce_max(gen_linalg_ops.svd( - array_ops.transpose(tensor, perm=perm_before), - compute_uv=False)[0], axis=-1, keepdims=True), axis=-1), - perm=perm_after) + rank = array_ops.rank(tensor) + axis = functional_ops.map_fn( + lambda i: control_flow_ops.cond(i >= 0, lambda: i, + lambda: i + rank), + ops.convert_to_tensor(axis)).eval() + axes = math_ops.range(rank) + perm_before = array_ops.concat( + [array_ops.setdiff1d(axes, axis)[0], axis], axis=0) + perm_after = functional_ops.map_fn( + lambda i: math_ops.cast( + array_ops.squeeze( + array_ops.where(math_ops.equal(perm_before, i))), + dtype=dtypes.int32), axes) + permed = array_ops.transpose(tensor, perm=perm_before) + matrix_2_norm = array_ops.expand_dims( + math_ops.reduce_max(gen_linalg_ops.svd(permed, compute_uv=False)[0], + axis=-1, keepdims=True), axis=-1) + result = array_ops.transpose(matrix_2_norm, perm=perm_after) else: result = math_ops.sqrt( math_ops.reduce_sum( -- GitLab From fda633fb7187da8522ef79555d1267996fa983bc Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Sun, 18 Mar 2018 21:29:16 +0900 Subject: [PATCH 0054/1262] remove test code --- tensorflow/python/ops/linalg_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index d8150d85b9..608b72c574 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -546,7 +546,7 @@ def norm(tensor, axis = functional_ops.map_fn( lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), - ops.convert_to_tensor(axis)).eval() + ops.convert_to_tensor(axis)) axes = math_ops.range(rank) perm_before = array_ops.concat( [array_ops.setdiff1d(axes, axis)[0], axis], axis=0) -- GitLab From a34a3b2035ca0cfd48488c03bd4b088070bf9a25 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 22 Mar 2018 14:32:12 -0700 Subject: [PATCH 0055/1262] Fixing the issue where MKL-DNN is getting built when not using --config=mkl --- tensorflow/tensorflow.bzl | 53 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 9b0db8a112..8549c34691 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -788,7 +788,33 @@ def tf_cc_test_mkl(srcs, tags=[], size="medium", args=None): - if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions")) + for src in srcs: + native.cc_test( + name=src_to_test_name(src), + srcs=if_mkl([src]) + tf_binary_additional_srcs(), + copts=tf_copts(), + linkopts=select({ + clean_dep("//tensorflow:android"): [ + "-pie", + ], + clean_dep("//tensorflow:windows"): [], + clean_dep("//tensorflow:windows_msvc"): [], + "//conditions:default": [ + "-lpthread", + "-lm" + ], + }) + _rpath_linkopts(src_to_test_name(src)), + deps=deps + if_mkl( + [ + "//third_party/mkl:intel_binary_blob", + ], + ), + linkstatic=linkstatic, + tags=tags, + size=size, + args=args, + nocopts="-fno-exceptions") + def tf_cc_tests_gpu(srcs, deps, @@ -1006,16 +1032,12 @@ register_extension_info( def tf_mkl_kernel_library(name, prefix=None, srcs=None, - gpu_srcs=None, hdrs=None, deps=None, alwayslink=1, copts=tf_copts(), - nocopts="-fno-exceptions", - **kwargs): + nocopts="-fno-exceptions"): """A rule to build MKL-based TensorFlow kernel libraries.""" - gpu_srcs = gpu_srcs # unused argument - kwargs = kwargs # unused argument if not bool(srcs): srcs = [] @@ -1028,16 +1050,15 @@ def tf_mkl_kernel_library(name, hdrs = hdrs + native.glob( [prefix + "*.h"]) - if_mkl( - native.cc_library( - name=name, - srcs=srcs, - hdrs=hdrs, - deps=deps, - alwayslink=alwayslink, - copts=copts, - nocopts=nocopts - )) + native.cc_library( + name=name, + srcs=if_mkl(srcs), + hdrs=hdrs, + deps=deps, + alwayslink=alwayslink, + copts=copts, + nocopts=nocopts + ) register_extension_info( extension_name = "tf_mkl_kernel_library", -- GitLab From 341f906e7b6011de4d4a10380a17040abc8bdf5e Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 22 Mar 2018 23:49:47 -0700 Subject: [PATCH 0056/1262] Do not follow control edges in segmenter and in conversion and (#17936) gracefully handle some failures (cherry picked from commit 5daa95eeeae66b21fc60e08bf0f7c35b3df517f6) (cherry picked from commit ee87a13583001dd9b19cb5272f85d227ad59297f) (cherry picked from commit 9a1e6b0e9ca25da050f5a1866235189e6db528ae) and squashed --- .../contrib/tensorrt/convert/convert_graph.cc | 19 +++-- .../contrib/tensorrt/convert/convert_nodes.cc | 82 +++++++++++++------ .../contrib/tensorrt/segment/segment.cc | 55 +++++++++---- tensorflow/contrib/tensorrt/segment/segment.h | 4 +- .../contrib/tensorrt/segment/segment_test.cc | 8 +- 5 files changed, 109 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 90447ee666..ff8cc6374d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -49,7 +49,7 @@ namespace tensorrt { namespace convert { namespace { -bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +bool IsTensorRTCandidate(const tensorflow::Node* node) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. @@ -75,7 +75,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) - return candidate_ops.count(node_def.op()); + return candidate_ops.count(node->type_string()); } void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, @@ -85,10 +85,10 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, const tensorflow::Node* node = graph.FindNodeId(node_id); for (const tensorflow::Edge* edge : node->in_edges()) { if (!subgraph_node_ids.count(edge->src()->id()) && - !edge->src()->IsSource()) { + !edge->src()->IsSource() && !edge->IsControlEdge()) { incoming_edges->insert(edge); } else { - VLOG(2) << edge->src()->name() << " N, "; + VLOG(2) << node->name() << " -> " << edge->src()->name() << " N, "; } } } @@ -101,11 +101,11 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, const tensorflow::Node* node = graph.FindNodeId(node_id); for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && - !edge->dst()->IsSink()) { - VLOG(2) << edge->dst()->name() << " Y, "; + !edge->dst()->IsSink() && !edge->IsControlEdge()) { + VLOG(2) << node->name() << " -> " << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); } else { - VLOG(2) << edge->dst()->name() << " N, "; + VLOG(2) << node->name() << " -> " << edge->dst()->name() << " N, "; } } } @@ -410,8 +410,9 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::Status status = ConvertSubGraphToTensorRT(&p); if (status != tensorflow::Status::OK()) { LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \n" - << status.ToString() << " SKIPPING......"; + << " due to: \"" << status.ToString() + << "\" SKIPPING......( " << subgraph_node_names.size() + << " nodes)"; } count++; } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 979b5648c2..f22502aaeb 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -53,8 +53,8 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; - namespace { inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, @@ -429,9 +429,8 @@ class Converter { tensorflow::tensorrt::TRTWeightStore* weight_store_; bool fp16_; void register_op_converters(); - std::vector get_inputs( - const tensorflow::NodeDef& node_def) { - std::vector inputs; + tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def, + std::vector* inputs) { for (auto const& input_name : node_def.input()) { /************************************************************************* * TODO(jie) handle case 1) here @@ -452,13 +451,17 @@ class Converter { VLOG(2) << "retrieve input: " << name; if (trt_tensors_.count(name)) { - inputs.push_back(trt_tensors_.at(name)); + inputs->push_back(trt_tensors_.at(name)); } else { - LOG(FATAL) << "input: " << name << " not availabled for node at, " - << node_def.name(); + string str("Node "); + StrAppend(&str, node_def.name(), " should have an input named '", name, + "' but it is not available"); + LOG(WARNING) << "input: " << name << " not available for node at " + << node_def.name(); + return tensorflow::errors::InvalidArgument(str); } } - return inputs; + return tensorflow::Status::OK(); } public: @@ -482,7 +485,8 @@ class Converter { } tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) { - std::vector inputs = this->get_inputs(node_def); + std::vector inputs; + TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs)); string op = node_def.op(); if (!op_registry_.count(op)) { return tensorflow::errors::Unimplemented( @@ -887,7 +891,7 @@ tensorflow::Status BinaryTensorOpWeight( // Check type consistency nvinfer1::DataType ttype; - TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); + TF_RETURN_IF_ERROR(ConvertDType(weights.type_, &ttype)); // Check scale mode auto dims_w = weights.shape_; @@ -1152,9 +1156,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1397,8 +1401,11 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); + LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + string err_str("Broadcast method is not supported for '"); + StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); + return tensorflow::errors::InvalidArgument(err_str); } } } else { @@ -1436,8 +1443,11 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.d[0] = weights_tensor.int_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); + LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); + string err_str("Broadcast method is not supported for '"); + StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); + return tensorflow::errors::InvalidArgument(err_str); } } } else { @@ -2139,8 +2149,11 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->thr_->join(); delete calib_res->thr_; if (!calib_res->engine_) { - LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " + LOG(ERROR) << "Calibration failed!, engine does not exist. Did you run " "calibration graph?"; + return tensorflow::errors::FailedPrecondition( + "Calibration graph needs to be executed on" + " calibration data before convertsion to inference graph"); } auto weight_rmgr = trt_rm->getManager("WeightStore"); TF_CHECK_OK(weight_rmgr->Delete( @@ -2177,7 +2190,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( return status; } auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_CHECK_OK(status); + TF_RETURN_IF_ERROR(status); for (size_t i = 0; i < out_edges.size(); i++) { VLOG(1) << "Connecting trt_engine_node output " << i << " with " << out_edges.at(i)->dst()->name() << " port " @@ -2275,6 +2288,12 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + auto type_status = ConvertDType(tf_dtype, &dtype); + if (type_status != tensorflow::Status::OK()) { + LOG(WARNING) << "Data type conversion for input '" << node_name + << "' failed"; + return type_status; + } TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); VLOG(2) << "accessing output index of: " << output_idx @@ -2342,8 +2361,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); + return tensorflow::errors::InvalidArgument("Output node'" + tensor_name + + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { @@ -2500,7 +2519,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); + auto type_status = ConvertDType(tf_dtype, &dtype); + if (type_status != tensorflow::Status::OK()) { + LOG(WARNING) << "Type conversion failed for " << node_name; + return type_status; + } VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name @@ -2511,8 +2534,12 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(jie): TRT 3.x only support 4 dimensional input tensor. // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) - return tensorflow::errors::Unimplemented("require 4 dimensional input"); + if (op_info.shape().dim_size() != 4) { + string err_str = "Require 4 dimensional input."; + StrAppend(&err_str, " Got ", op_info.shape().dim_size(), " ", + shape_inference_node_name); + return tensorflow::errors::Unimplemented(err_str); + } for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i @@ -2573,8 +2600,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); + return tensorflow::errors::InvalidArgument("Output node '" + tensor_name + + "' is weights not tensor"); } nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { @@ -2618,7 +2645,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } TF_RETURN_IF_ERROR(weight_rmgr->Delete( engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name; + LOG(INFO) << "finished engine " << engine_name << " containing " + << s.subgraph_node_ids.size() << " nodes"; // Build the TRT op tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 6193f0b0a1..8fc4697c51 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -80,13 +80,20 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, std::vector in_edges(dst->in_edges().begin(), dst->in_edges().end()); for (const tensorflow::Edge* in_edge : in_edges) { - if (in_edge->src() != src) { - tensorflow::Edge* e = const_cast(in_edge); - if (e->src() == graph->source_node()) { - graph->AddEdge(e->src(), e->src_output(), src, - tensorflow::Graph::kControlSlot); - } else { - graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */); + if (in_edge->IsControlEdge()) { + if (in_edge->src() != src) { + tensorflow::Edge* e = const_cast(in_edge); + graph->AddControlEdge(e->src(), src); + } + } else { + if (in_edge->src() != src) { + tensorflow::Edge* e = const_cast(in_edge); + if (e->src() == graph->source_node()) { + graph->AddEdge(e->src(), e->src_output(), src, + tensorflow::Graph::kControlSlot); + } else { + graph->AddEdge(e->src(), e->src_output(), src, 0 /* input index */); + } } } } @@ -94,12 +101,19 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, std::vector out_edges(dst->out_edges().begin(), dst->out_edges().end()); for (const tensorflow::Edge* out_edge : out_edges) { - tensorflow::Edge* e = const_cast(out_edge); - if (e->dst() == graph->sink_node()) { - graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(), - e->dst_input()); + if (out_edge->IsControlEdge()) { + tensorflow::Edge* e = const_cast(out_edge); + graph->AddControlEdge(src, e->dst()); } else { - graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input()); + tensorflow::Edge* e = const_cast(out_edge); + if (e->dst() == graph->sink_node()) { + VLOG(1) << " edge to sink node " << src->name() << " -> " + << e->dst()->name(); + graph->AddEdge(src, tensorflow::Graph::kControlSlot, e->dst(), + e->dst_input()); + } else { + graph->AddEdge(src, 0 /* output index */, e->dst(), e->dst_input()); + } } } @@ -118,7 +132,7 @@ void ContractEdge(tensorflow::Edge* edge, tensorflow::Graph* graph, tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, - const std::function& candidate_fn, + const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments) { // Create a Graph representation of the GraphDef. tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), @@ -136,7 +150,7 @@ tensorflow::Status SegmentGraph( for (int i = 0; i < graph.num_node_ids(); ++i) { tensorflow::Node* node = graph.FindNodeId(i); if (options.exclude_node_list.count(node->name()) != 0 || - !candidate_fn(node->def())) { + !candidate_fn(node)) { node = nullptr; } node_segments.emplace_back(node); @@ -155,7 +169,7 @@ tensorflow::Status SegmentGraph( for (const tensorflow::Node* node : order) { // All output nodes of 'node' have been visited... - VLOG(2) << "Trying node " << node->name(); + VLOG(2) << "Trying node " << node->name() << " id=" << node->id(); // 'node' must be a TRT candidate... if (node_segments[node->id()].Value() == nullptr) { @@ -169,8 +183,12 @@ tensorflow::Status SegmentGraph( while (true) { std::set contract_edges; for (const tensorflow::Edge* out_edge : node->out_edges()) { - VLOG(2) << "... out node " << out_edge->dst()->name(); - + VLOG(2) << "... out node " << out_edge->dst()->name() << " ( " + << out_edge->dst()->id() << " <- " << node->id() << " )"; + if (out_edge->IsControlEdge()) { + VLOG(2) << "... ... Control Edge, Skipping"; + continue; + } // Out node must be TRT candidate... if (node_segments[out_edge->dst()->id()].Value() == nullptr) { VLOG(2) << "... ... not a TRT candidate"; @@ -196,7 +214,8 @@ tensorflow::Status SegmentGraph( const tensorflow::Node* src = contract_edge->src(); const tensorflow::Node* dst = contract_edge->dst(); - VLOG(2) << "Merge " << src->name() << " <- " << dst->name(); + VLOG(2) << "Merge " << src->name() << " <- " << dst->name() << " (" + << src->id() << " <- " << dst->id(); node_segments[src->id()].Merge(&node_segments[dst->id()]); // Contracting the edge leaves disconnected graph edges. diff --git a/tensorflow/contrib/tensorrt/segment/segment.h b/tensorflow/contrib/tensorrt/segment/segment.h index ee6e2b3ed2..7e8685f44a 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.h +++ b/tensorflow/contrib/tensorrt/segment/segment.h @@ -20,10 +20,12 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { + namespace tensorrt { namespace segment { @@ -46,7 +48,7 @@ struct SegmentOptions { // @return the status. tensorflow::Status SegmentGraph( const tensorflow::GraphDef& gdef, - const std::function& candidate_fn, + const std::function& candidate_fn, const SegmentOptions& options, SegmentNodesVector* segments); } // namespace segment diff --git a/tensorflow/contrib/tensorrt/segment/segment_test.cc b/tensorflow/contrib/tensorrt/segment/segment_test.cc index 74cbc5f2b3..7ddabec268 100644 --- a/tensorflow/contrib/tensorrt/segment/segment_test.cc +++ b/tensorflow/contrib/tensorrt/segment/segment_test.cc @@ -35,7 +35,7 @@ class SegmentTest : public ::testing::Test { TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name); - std::function MakeCandidateFn( + std::function MakeCandidateFn( const std::set& node_names); protected: @@ -60,10 +60,10 @@ bool SegmentTest::GetGraphDef(TF_Graph* graph, return ret; } -std::function SegmentTest::MakeCandidateFn( +std::function SegmentTest::MakeCandidateFn( const std::set& node_names) { - return [node_names](const NodeDef& node) -> bool { - return node_names.find(node.name()) != node_names.end(); + return [node_names](const Node* node) -> bool { + return node_names.find(node->name()) != node_names.end(); }; } -- GitLab From ad61950fb5db57aa5a4089203a1a4bf48df8c5f4 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 23 Mar 2018 14:12:34 -0700 Subject: [PATCH 0057/1262] Instead of depending on ctest to be in PATH, directly reference the binary. (#17964) PiperOrigin-RevId: 190137278 --- tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat index b537192a94..97829892b1 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_py.bat @@ -28,6 +28,9 @@ IF DEFINED TF_NIGHTLY (ECHO TF_NIGHTLY is set to %TF_NIGHTLY%) ELSE (SET TF_NIGH :: Set pip binary location. Do not override if it is set already. IF DEFINED PIP_EXE (ECHO PIP_EXE is set to %PIP_EXE%) ELSE (SET PIP_EXE="C:\Program Files\Anaconda3\Scripts\pip.exe") +:: Set ctest binary location. +IF DEFINED CTEST_EXE (ECHO CTEST_EXE is set to %CTEST_EXE%) ELSE (SET CTEST_EXE="C:\Program Files\cmake\bin\ctest.exe") + :: Run the CMAKE build to build the pip package. CALL %REPO_ROOT%\tensorflow\tools\ci_build\windows\gpu\cmake\run_build.bat if %errorlevel% neq 0 exit /b %errorlevel% @@ -47,4 +50,4 @@ if %errorlevel% neq 0 exit /b %errorlevel% :: Run all python tests if the installation succeeded. echo Running tests... -ctest -C Release --output-on-failure --jobs 1 +%CTEST_EXE% -C Release --output-on-failure --jobs 1 -- GitLab From 3fbdba0c84941f34782a5e074b691916bca61a93 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 26 Mar 2018 11:49:03 -0700 Subject: [PATCH 0058/1262] update GPU installation instructions --- tensorflow/docs_src/install/install_linux.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 378946b459..3c5db9bced 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -33,7 +33,7 @@ must be installed on your system: * CUDA® Toolkit 9.0. For details, see [NVIDIA's documentation](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/#axzz4VZnqTJ2A). - Ensure that you append the relevant Cuda pathnames to the + Ensure that you append the relevant CUDA pathnames to the `LD_LIBRARY_PATH` environment variable as described in the NVIDIA documentation. * The NVIDIA drivers associated with CUDA Toolkit 9.0. @@ -56,7 +56,7 @@ must be installed on your system: and add its path to your `LD_LIBRARY_PATH` environment variable:
-    $ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64
+    $ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}/usr/local/cuda/extras/CUPTI/lib64
     
For CUDA Toolkit <= 7.5 do: @@ -64,6 +64,16 @@ must be installed on your system:
     $ sudo apt-get install libcupti-dev
     
+ * **[OPTIONAL]** For optimized inferencing performance, you can also install + NVIDIA TensorRT 3.0. For details, see + [NVIDIA's TensorRT documentation](http://docs.nvidia.com/deeplearning/sdk/tensorrt-install-guide/index.html#installing-tar). + Only steps 1-4 in the TensorRT Tar File installation instructions are + required for compatibility with TensorFlow; the Python package installation + in steps 5 and 6 can be omitted. Detailed installation instructions can be found at [package documentataion](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/tensorrt#installing-tensorrt-304) + + **IMPORTANT:** For compatibility with the pre-built `tensorflow-gpu` + package, please use the Ubuntu **14.04** tar file package of TensorRT + even when installing onto an Ubuntu 16.04 system. If you have an earlier version of the preceding packages, please upgrade to the specified versions. If upgrading is not possible, then you may still run -- GitLab From ea644ac0783537a6ac8a2c8a2432829b3db69aeb Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 26 Mar 2018 13:05:52 -0700 Subject: [PATCH 0059/1262] Disabling the state_management_test. For non-pip builds also. --- tensorflow/contrib/timeseries/python/timeseries/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 64f5cd8357..d72cc1b8a2 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -233,6 +233,7 @@ py_test( ], srcs_version = "PY2AND3", tags = [ + "manual", "no_pip", # b/64527635 "no_pip_gpu", # b/63391119 ], -- GitLab From 1fcef75aaa1989376324ff8dfc25033b443a69df Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Mon, 26 Mar 2018 13:48:00 -0700 Subject: [PATCH 0060/1262] Update BUILD --- tensorflow/contrib/timeseries/python/timeseries/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index d72cc1b8a2..67ee644d3b 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -233,7 +233,7 @@ py_test( ], srcs_version = "PY2AND3", tags = [ - "manual", + "no_oss", "no_pip", # b/64527635 "no_pip_gpu", # b/63391119 ], -- GitLab From 083cf6b91a380641933457a4301f9b1efa13af92 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Oct 2017 17:03:15 +0000 Subject: [PATCH 0061/1262] Add customerized kernel implementation for clip_by_value This fix tries to address the issue raised in 7225 where `tf.clip_by_value` does not have a custom kernel and reused `tf.maximum` and `tf.mimimum`. In case scalar values are passed to `tf.clip_by_value`, unnecessary memory might incur. This fix adds the customerized kernel implementation for `tf.clip_by_value`. This fix fixes 7225. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 150 +++++++++++++++++++++++ tensorflow/core/ops/math_ops.cc | 23 ++++ 2 files changed, 173 insertions(+) create mode 100644 tensorflow/core/kernels/cwise_op_clip.cc diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc new file mode 100644 index 0000000000..6ce062b08f --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -0,0 +1,150 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/cwise_ops_common.h" + +//#include "third_party/eigen3/Eigen/Core/CwiseTernaryOp.h" + +namespace tensorflow { + +// Unary functor for clip +template +struct UnaryClipOp { + UnaryClipOp(const T& value_min, const T& value_max) + : value_min_(value_min), value_max_(value_max) {} + const T operator()(const T& value) const { + return std::max(std::min(value, value_max_), value_min_); + } + T value_min_; + T value_max_; +}; + +// Binary functor for clip +template +struct BinaryClipMinOp { + BinaryClipMinOp(const T& value_min) : value_min_(value_min) {} + const T operator()(const T& value, const T& value_max) const { + return std::max(std::min(value, value_max), value_min_); + } + T value_min_; +}; + +// Binary functor for clip +template +struct BinaryClipMaxOp { + BinaryClipMaxOp(const T& value_max) : value_max_(value_max) {} + const T operator()(const T& value, const T& value_min) const { + return std::max(std::min(value, value_max_), value_min); + } + T value_max_; +}; + +// Basic coefficient-wise tenary operations. +// This is the case for example of the clip_by_value. +// Device: E.g., CPUDevice, GPUDevice. +// Functor: defined above. E.g., functor::clip. +template +class TenaryOp : public OpKernel { + public: + explicit TenaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& in0 = ctx->input(0); + const Tensor& in1 = ctx->input(1); + const Tensor& in2 = ctx->input(2); + + auto in0_flat = in0.flat(); + auto in1_flat = in1.flat(); + auto in2_flat = in2.flat(); + const Device& d = ctx->eigen_device(); + + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); + auto out_flat = out->flat(); + if (in1.shape() == in2.shape()) { + if (in0.shape() == in1.shape()) { + out_flat = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + } else { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + out_flat = in0_flat.unaryExpr(UnaryClipOp(in1_flat(0), in2_flat(0))); + } + } else { + if (in0.shape() == in1.shape()) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + out_flat = + in0_flat.binaryExpr(in1_flat, BinaryClipMaxOp(in2_flat(0))); + + } else { + OP_REQUIRES(ctx, (in0.shape() == in2.shape() && + TensorShapeUtils::IsScalar(in1.shape())), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + out_flat = + in0_flat.binaryExpr(in2_flat, BinaryClipMinOp(in1_flat(0))); + } + } + } +}; + +#define REGISTER_CPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint("T"), \ + TenaryOp); + +REGISTER_CPU_KERNEL(Eigen::half); +REGISTER_CPU_KERNEL(float); +REGISTER_CPU_KERNEL(double); +REGISTER_CPU_KERNEL(int8); +REGISTER_CPU_KERNEL(int16); +REGISTER_CPU_KERNEL(int32); +REGISTER_CPU_KERNEL(int64); +REGISTER_CPU_KERNEL(uint8); +REGISTER_CPU_KERNEL(uint16); + +#undef REGISTER_CPU_KERNEL + +#if GOOGLE_CUDA +// REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); + +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("ClipByValue") + .Device(DEVICE_GPU) + .HostMemory("t") + .HostMemory("clip_value_min") + .HostMemory("clip_value_min") + .TypeConstraint("T"), + TenaryOp); +#endif + +} // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 8f33d51d5a..602a6ec115 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1558,6 +1558,29 @@ REGISTER_OP("Bucketize") .Attr("boundaries: list(float)") .SetShapeFn(shape_inference::UnchangedShape); +REGISTER_OP("ClipByValue") + .Input("t: T") + .Input("clip_value_min: T") + .Input("clip_value_max: T") + .Output("output: T") + .Attr("T: numbertype") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( +Clips tensor values to a specified min and max. + +Given a tensor `t`, this operation returns a tensor of the same type and +shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. +Any values less than `clip_value_min` are set to `clip_value_min`. Any values +greater than `clip_value_max` are set to `clip_value_max`. + +t: A `Tensor`. +clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape + as `t`. The minimum value to clip by. +clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape + as `t`. The maximum value to clip by. +output: A clipped `Tensor` with the same shape as input 't'. +)doc"); + #ifdef INTEL_MKL REGISTER_OP("_MklAddN") .Input("inputs: N * T") -- GitLab From daf0b206b5afde875a19270136ad22d9d2bb138c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Oct 2017 17:08:32 +0000 Subject: [PATCH 0062/1262] Add python wrapper for tf.clip_by_value Signed-off-by: Yong Tang --- tensorflow/python/ops/clip_ops.py | 17 +- tensorflow/python/ops/hidden_ops.txt | 395 +++++++++++++++++++++++++++ 2 files changed, 400 insertions(+), 12 deletions(-) create mode 100644 tensorflow/python/ops/hidden_ops.txt diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index 49f8c66531..a5baebb3f6 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -58,18 +59,10 @@ def clip_by_value(t, clip_value_min, clip_value_max, """ with ops.name_scope(name, "clip_by_value", [t, clip_value_min, clip_value_max]) as name: - t = ops.convert_to_tensor(t, name="t") - - # Go through list of tensors, for each value in each tensor clip - t_min = math_ops.minimum(t, clip_value_max) - # Assert that the shape is compatible with the initial shape, - # to prevent unintentional broadcasting. - _ = t.shape.merge_with(t_min.shape) - - t_max = math_ops.maximum(t_min, clip_value_min, name=name) - _ = t.shape.merge_with(t_max.shape) - - return t_max + return gen_math_ops._clip_by_value(t, + clip_value_min, + clip_value_max, + name=name) @tf_export("clip_by_norm") diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt new file mode 100644 index 0000000000..e1217e984c --- /dev/null +++ b/tensorflow/python/ops/hidden_ops.txt @@ -0,0 +1,395 @@ +# array_ops +BatchToSpace +BroadcastArgs +BroadcastGradientArgs +ConcatOffset +Concat +ConcatV2 +ConjugateTranspose +Const +DebugGradientIdentity +DebugGradientRefIdentity +EditDistance +ExpandDims +ListDiff +MirrorPad +MirrorPadGrad +OneHot +Pack +Pad +PadV2 +ParallelConcat +Placeholder +RefIdentity +Reverse +Snapshot +SpaceToBatch +Split +SplitV +Squeeze +Slice +TileGrad # Exported through array_grad instead of array_ops. +ZerosLike # TODO(josh11b): Use this instead of the Python version. +Unique +UniqueV2 +UniqueWithCounts +UniqueWithCountsV2 +Unpack + +# candidate_sampling_ops +AllCandidateSampler +ComputeAccidentalHits +FixedUnigramCandidateSampler +LearnedUnigramCandidateSampler +LogUniformCandidateSampler +ThreadUnsafeUnigramCandidateSampler +UniformCandidateSampler + +# checkpoint_ops +GenerateVocabRemapping +LoadAndRemapMatrix + + +# control_flow_ops +Switch +Merge +RefMerge +Exit +RefExit + +# ctc_ops +CTCLoss +CTCGreedyDecoder +CTCBeamSearchDecoder + +# data_flow_ops +Barrier +BarrierClose +BarrierIncompleteSize +BarrierInsertMany +BarrierReadySize +BarrierTakeMany +DeleteSessionTensor +FakeQueue +FIFOQueue +FIFOQueueV2 +GetSessionHandle +GetSessionHandleV2 +GetSessionTensor +HashTable +HashTableV2 +InitializeTable +InitializeTableV2 +InitializeTableFromTextFile +InitializeTableFromTextFileV2 +LookupTableExport +LookupTableExportV2 +LookupTableFind +LookupTableFindV2 +LookupTableImport +LookupTableImportV2 +LookupTableInsert +LookupTableInsertV2 +LookupTableSize +LookupTableSizeV2 +MutableDenseHashTable +MutableDenseHashTableV2 +MutableHashTable +MutableHashTableV2 +MutableHashTableOfTensors +MutableHashTableOfTensorsV2 +Mutex +MutexAcquire +MutexRelease +PaddingFIFOQueue +PaddingFIFOQueueV2 +PriorityQueue +PriorityQueueV2 +QueueClose +QueueCloseV2 +QueueDequeue +QueueDequeueV2 +QueueDequeueMany +QueueDequeueManyV2 +QueueDequeueUpTo +QueueDequeueUpToV2 +QueueEnqueue +QueueEnqueueV2 +QueueEnqueueMany +QueueEnqueueManyV2 +QueueSize +QueueSizeV2 +RandomShuffleQueue +RandomShuffleQueueV2 +Stack +StackClose +StackPop +StackPush +StackV2 +StackCloseV2 +StackPopV2 +StackPushV2 +TensorArray +TensorArrayClose +TensorArrayCloseV2 +TensorArrayConcat +TensorArrayConcatV2 +TensorArrayGather +TensorArrayGatherV2 +TensorArrayGrad +TensorArrayGradV2 +TensorArrayPack +TensorArrayPackV2 +TensorArrayRead +TensorArrayReadV2 +TensorArrayScatter +TensorArrayScatterV2 +TensorArraySize +TensorArraySizeV2 +TensorArraySplit +TensorArraySplitV2 +TensorArrayUnpack +TensorArrayUnpackV2 +TensorArrayV2 +TensorArrayWrite +TensorArrayWriteV2 +TensorArrayV3 +TensorArrayCloseV3 +TensorArrayConcatV3 +TensorArrayGatherV3 +TensorArrayGradV3 +TensorArrayReadV3 +TensorArrayPackV3 +TensorArrayScatterV3 +TensorArraySizeV3 +TensorArraySplitV3 +TensorArrayUnpackV3 +TensorArrayWriteV3 + +# functional_ops +SymbolicGradient + +# image_ops +AdjustContrastv2 +NonMaxSuppression +NonMaxSuppressionV2 +RandomCrop +ResizeBilinearGrad +ResizeBicubicGrad +ResizeNearestNeighborGrad +SampleDistortedBoundingBox +SampleDistortedBoundingBoxV2 +ScaleImageGrad + +# io_ops +FixedLengthRecordReader +IdentityReader +ReaderNumRecordsProduced +ReaderNumWorkUnitsCompleted +ReaderRead +ReaderReadUpTo +ReaderReset +ReaderRestoreState +ReaderSerializeState +ReaderWorkQueueLength +FixedLengthRecordReaderV2 +IdentityReaderV2 +ReaderNumRecordsProducedV2 +ReaderNumWorkUnitsCompletedV2 +ReaderReadV2 +ReaderReadUpToV2 +ReaderResetV2 +ReaderRestoreStateV2 +ReaderSerializeStateV2 +ReaderWorkQueueLengthV2 +Restore +RestoreSlice +Save +SaveSlices +ShardedFilename +ShardedFilespec +TextLineReader +TFRecordReader +WholeFileReader +TextLineReaderV2 +TFRecordReaderV2 +WholeFileReaderV2 +LMDBReader +DecodeCSV + +# linalg_ops +BatchCholesky +BatchCholeskyGrad +BatchMatrixDeterminant +BatchMatrixInverse +BatchMatrixSolve +BatchMatrixSolveLs +BatchMatrixTriangularSolve +BatchSelfAdjointEig +BatchSelfAdjointEigV2 +BatchSvd +LogMatrixDeterminant +MatrixExponential +MatrixLogarithm +MatrixSolveLs +SelfAdjointEig +SelfAdjointEigV2 +Svd + +# logging_ops +Assert +AudioSummary +AudioSummaryV2 +HistogramSummary +ImageSummary +MergeSummary +Print +ScalarSummary +TensorSummary +TensorSummaryV2 + +# math_ops +Abs +AccumulateNV2 +AddN +AddV2 +All +Any +BatchMatMul +BatchFFT +BatchFFT2D +BatchFFT3D +BatchIFFT +BatchIFFT2D +BatchIFFT3D +Bucketize +ClipByValue +Complex +ComplexAbs +Conj +FloorDiv +FloorMod +HistogramFixedWidth +Max +Mean +Min +Mul +Neg +Pow +Prod +Range +RealDiv +Select +SparseMatMul +Sub +Sum +MatMul +Sigmoid +Tanh +SigmoidGrad +TanhGrad +InvGrad +ReciprocalGrad +SqrtGrad +RsqrtGrad +TruncateDiv +TruncateMod + +# nn_ops +AvgPoolGrad # "*Grad" accessible through nn_grad instead of nn_ops. +AvgPool3DGrad +BatchNormWithGlobalNormalization +BatchNormWithGlobalNormalizationGrad +FusedBatchNorm +FusedBatchNormV2 +SoftmaxCrossEntropyWithLogits +SparseSoftmaxCrossEntropyWithLogits +LRNGrad +MaxPoolGrad +MaxPoolGradWithArgmax +MaxPoolGradGrad +MaxPoolGradGradWithArgmax +MaxPool3DGrad +MaxPool3DGradGrad +ReluGrad +Relu6Grad +EluGrad +SeluGrad +SoftplusGrad +SoftsignGrad +TopK +TopKV2 +BiasAdd +BiasAddV1 +Relu6 +AvgPool +MaxPool +MaxPoolV2 +Softmax +LogSoftmax +FractionalAvgPoolGrad +FractionalMaxPoolGrad +InTopK +InTopKV2 + +# parsing_ops +ParseExample +ParseSingleSequenceExample + +# random_ops +RandomGamma +RandomPoisson +RandomUniform +RandomUniformInt +RandomShuffle +RandomStandardNormal +ParameterizedTruncatedNormal +TruncatedNormal + +# script_ops +PyFunc +PyFuncStateless +EagerPyFunc + +# sdca_ops + +# state_ops +Variable +VariableV2 +TemporaryVariable +DestroyTemporaryVariable + +# sparse_ops +AddSparseToTensorsMap +AddManySparseToTensorsMap +TakeManySparseFromTensorsMap +DeserializeManySparse +DeserializeSparse +SerializeManySparse +SerializeSparse +SparseAdd +SparseAddGrad +SparseConcat +SparseCross +SparseFillEmptyRows +SparseFillEmptyRowsGrad +SparseSplit +SparseSelectLastK +SparseReorder +SparseReshape +SparseToDense +SparseTensorDenseAdd +SparseTensorDenseMatMul + +# string_ops +StringSplit + +# user_ops +Fact + +# training_ops +# (None) + +# word2vec deprecated ops +NegTrain +Skipgram -- GitLab From 90a271e7a37574fc1c90fd6042c3b3972645d114 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Oct 2017 17:09:05 +0000 Subject: [PATCH 0063/1262] Update tests for `tf.clip_by_value` Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/clip_ops_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 5c8b71da17..d47930350e 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops from tensorflow.python.platform import test @@ -42,10 +43,12 @@ class ClipTest(test.TestCase): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) - with self.assertRaises(ValueError): - _ = clip_ops.clip_by_value(x, -clip, clip) - with self.assertRaises(ValueError): - _ = clip_ops.clip_by_value(x, 1.0, clip) + with self.assertRaises(errors_impl.InvalidArgumentError): + ans = clip_ops.clip_by_value(x, -clip, clip) + tf_ans = ans.eval() + with self.assertRaises(errors_impl.InvalidArgumentError): + ans = clip_ops.clip_by_value(x, 1.0, clip) + tf_ans = ans.eval() def testClipByValueNonFinite(self): with self.test_session(): -- GitLab From cff8abcb1a9305491637dc44559316aa1d8184e6 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 26 Oct 2017 04:37:55 +0000 Subject: [PATCH 0064/1262] Add GPU kernel for tf.clip_by_value Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 162 +++++++++++++----- tensorflow/core/kernels/cwise_op_clip.h | 61 +++++++ .../core/kernels/cwise_op_clip_gpu.cu.cc | 134 +++++++++++++++ 3 files changed, 313 insertions(+), 44 deletions(-) create mode 100644 tensorflow/core/kernels/cwise_op_clip.h create mode 100644 tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 6ce062b08f..c2980acdd8 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -13,43 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/cwise_ops_common.h" - -//#include "third_party/eigen3/Eigen/Core/CwiseTernaryOp.h" +#include "tensorflow/core/kernels/cwise_op_clip.h" namespace tensorflow { -// Unary functor for clip -template -struct UnaryClipOp { - UnaryClipOp(const T& value_min, const T& value_max) - : value_min_(value_min), value_max_(value_max) {} - const T operator()(const T& value) const { - return std::max(std::min(value, value_max_), value_min_); - } - T value_min_; - T value_max_; -}; - -// Binary functor for clip -template -struct BinaryClipMinOp { - BinaryClipMinOp(const T& value_min) : value_min_(value_min) {} - const T operator()(const T& value, const T& value_max) const { - return std::max(std::min(value, value_max), value_min_); - } - T value_min_; -}; - -// Binary functor for clip -template -struct BinaryClipMaxOp { - BinaryClipMaxOp(const T& value_max) : value_max_(value_max) {} - const T operator()(const T& value, const T& value_min) const { - return std::max(std::min(value, value_max_), value_min); - } - T value_max_; -}; +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; // Basic coefficient-wise tenary operations. // This is the case for example of the clip_by_value. @@ -76,7 +45,8 @@ class TenaryOp : public OpKernel { auto out_flat = out->flat(); if (in1.shape() == in2.shape()) { if (in0.shape() == in1.shape()) { - out_flat = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + functor::TernaryClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } else { OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), errors::InvalidArgument( @@ -85,7 +55,8 @@ class TenaryOp : public OpKernel { "input shape: ", in0.shape().DebugString(), "clip_value_min shape: ", in1.shape().DebugString(), "clip_value_max shape: ", in2.shape().DebugString())); - out_flat = in0_flat.unaryExpr(UnaryClipOp(in1_flat(0), in2_flat(0))); + functor::UnaryClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } } else { if (in0.shape() == in1.shape()) { @@ -96,9 +67,8 @@ class TenaryOp : public OpKernel { "input shape: ", in0.shape().DebugString(), "clip_value_min shape: ", in1.shape().DebugString(), "clip_value_max shape: ", in2.shape().DebugString())); - out_flat = - in0_flat.binaryExpr(in1_flat, BinaryClipMaxOp(in2_flat(0))); - + functor::BinaryLeftClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } else { OP_REQUIRES(ctx, (in0.shape() == in2.shape() && TensorShapeUtils::IsScalar(in1.shape())), @@ -108,13 +78,103 @@ class TenaryOp : public OpKernel { "input shape: ", in0.shape().DebugString(), "clip_value_min shape: ", in1.shape().DebugString(), "clip_value_max shape: ", in2.shape().DebugString())); - out_flat = - in0_flat.binaryExpr(in2_flat, BinaryClipMinOp(in1_flat(0))); + functor::BinaryRightClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } } } }; +namespace functor { +// Unary functor for clip [Tensor, Scalar, Scalar] +template +struct UnaryClipFunc { + UnaryClipFunc(const T& value_min, const T& value_max) + : value_min_(value_min), value_max_(value_max) {} + const T operator()(const T& value) const { + return std::max(std::min(value, value_max_), value_min_); + } + T value_min_; + T value_max_; +}; +template +struct UnaryClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat = in0_flat.unaryExpr(UnaryClipFunc(in1_flat(0), in2_flat(0))); + } +}; + +// Binary functor for clip [Tensor, Scalar, Tensor] +template +struct BinaryRightClipFunc { + BinaryRightClipFunc(const T& value_min) : value_min_(value_min) {} + const T operator()(const T& value, const T& value_max) const { + return std::max(std::min(value, value_max), value_min_); + } + T value_min_; +}; +template +struct BinaryRightClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat = + in0_flat.binaryExpr(in2_flat, BinaryRightClipFunc(in1_flat(0))); + } +}; + +// Binary functor for clip [Tensor, Tensor, Scalar] +template +struct BinaryLeftClipFunc { + BinaryLeftClipFunc(const T& value_max) : value_max_(value_max) {} + const T operator()(const T& value, const T& value_min) const { + return std::max(std::min(value, value_max_), value_min); + } + T value_max_; +}; +template +struct BinaryLeftClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat = + in0_flat.binaryExpr(in1_flat, BinaryLeftClipFunc(in2_flat(0))); + } +}; + +// Ternary functor for clip [Tensor, Tensor, Tensor] +template +struct TernaryClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + } +}; + +#define INSTANTIATE_CPU(T) \ + template struct UnaryClipOp; \ + template struct BinaryRightClipOp; \ + template struct BinaryLeftClipOp; \ + template struct TernaryClipOp; +INSTANTIATE_CPU(Eigen::half); +INSTANTIATE_CPU(float); +INSTANTIATE_CPU(double); +INSTANTIATE_CPU(int8); +INSTANTIATE_CPU(int16); +INSTANTIATE_CPU(int32); +INSTANTIATE_CPU(int64); +INSTANTIATE_CPU(uint8); +INSTANTIATE_CPU(uint16); +#undef INSTANTIATE_CPU +} // namespace functor + #define REGISTER_CPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint("T"), \ @@ -129,11 +189,22 @@ REGISTER_CPU_KERNEL(int32); REGISTER_CPU_KERNEL(int64); REGISTER_CPU_KERNEL(uint8); REGISTER_CPU_KERNEL(uint16); - #undef REGISTER_CPU_KERNEL #if GOOGLE_CUDA -// REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("ClipByValue").Device(DEVICE_GPU).TypeConstraint("T"), \ + TenaryOp); +REGISTER_GPU_KERNEL(Eigen::half); +REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(double); +REGISTER_GPU_KERNEL(int8); +REGISTER_GPU_KERNEL(int16); +REGISTER_GPU_KERNEL(int64); +REGISTER_GPU_KERNEL(uint8); +REGISTER_GPU_KERNEL(uint16); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel @@ -142,9 +213,12 @@ REGISTER_KERNEL_BUILDER(Name("ClipByValue") .Device(DEVICE_GPU) .HostMemory("t") .HostMemory("clip_value_min") - .HostMemory("clip_value_min") + .HostMemory("clip_value_max") + .HostMemory("output") .TypeConstraint("T"), TenaryOp); + +#undef REGISTER_GPU_KERNEL #endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_clip.h b/tensorflow/core/kernels/cwise_op_clip.h new file mode 100644 index 0000000000..1a4bf8cf1d --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_clip.h @@ -0,0 +1,61 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_ +#define TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_ + +#include "tensorflow/core/kernels/cwise_ops_common.h" + +namespace tensorflow { +namespace functor { +// Unary functor for clip [Tensor, Scalar, Scalar] +template +struct UnaryClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; + +// Binary functor for clip [Tensor, Scalar, Tensor] +template +struct BinaryRightClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; + +// Binary functor for clip [Tensor, Tensor, Scalar] +template +struct BinaryLeftClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; + +// Ternary functor for clip [Tensor, Tensor, Tensor] +template +struct TernaryClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; +} +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_ diff --git a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc new file mode 100644 index 0000000000..5c07847548 --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc @@ -0,0 +1,134 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/kernels/cwise_op_clip.h" +#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +template +__global__ void UnaryClipCustomKernel(const int32 size_in, const T *in0, + const T *in1, const T *in2, T *out) { + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in2[0] < in0[i] ? in2[0] : in0[i]; + out[i] = value < in1[0] ? in1[0] : value; + } +} + +template +__global__ void BinaryRightClipCustomKernel(const int32 size_in, const T *in0, + const T *in1, const T *in2, + T *out) { + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in2[i] < in0[i] ? in2[i] : in0[i]; + out[i] = value < in1[0] ? in1[0] : value; + } +} + +template +__global__ void BinaryLeftClipCustomKernel(const int32 size_in, const T *in0, + const T *in1, const T *in2, T *out) { + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in2[0] < in0[i] ? in2[0] : in0[i]; + out[i] = value < in1[i] ? in1[i] : value; + } +} + +namespace functor { + +// Unary functor for clip [Tensor, Scalar, Scalar] +template +struct UnaryClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d); + + UnaryClipCustomKernel< + T><<>>( + in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(), + out_flat.data()); + } +}; + +// Binary functor for clip [Tensor, Scalar, Tensor] +template +struct BinaryRightClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d); + + BinaryRightClipCustomKernel< + T><<>>( + in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(), + out_flat.data()); + } +}; + +// Binary functor for clip [Tensor, Tensor, Scalar] +template +struct BinaryLeftClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d); + + BinaryLeftClipCustomKernel< + T><<>>( + in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(), + out_flat.data()); + } +}; + +// Ternary functor for clip [Tensor, Tensor, Tensor] +template +struct TernaryClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + } +}; + +#define INSTANTIATE_GPU(T) \ + template struct UnaryClipOp; \ + template struct BinaryRightClipOp; \ + template struct BinaryLeftClipOp; \ + template struct TernaryClipOp; +INSTANTIATE_GPU(Eigen::half); +INSTANTIATE_GPU(float); +INSTANTIATE_GPU(double); +INSTANTIATE_GPU(int8); +INSTANTIATE_GPU(int16); +INSTANTIATE_GPU(int32); +INSTANTIATE_GPU(int64); +INSTANTIATE_GPU(uint8); +INSTANTIATE_GPU(uint16); +#undef INSTANTIATE_GPU + +} // namespace functor +} // namespace tensorflow + +#endif // GOOGLE_CUDA -- GitLab From a3553d45b63fba1cd4eb8d1d5b6dd0d565c94879 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 26 Oct 2017 04:38:38 +0000 Subject: [PATCH 0065/1262] Update test cases for tf.clip_by_value Signed-off-by: Yong Tang --- .../python/kernel_tests/clip_ops_test.py | 105 ++++++++++++++---- 1 file changed, 85 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index d47930350e..2d03fb99e4 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops @@ -29,7 +30,7 @@ class ClipTest(test.TestCase): # ClipByValue test def testClipByValue(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3]) np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]] clip_value = 4.4 @@ -38,8 +39,72 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) + # [Tensor, Scalar, Scalar] + def testClipByValue0Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [4, 4, 4]] + clip_value_min = 2 + clip_value_max = 4 + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Tensor, Scalar] + def testClipByValue1Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [4, 4, 4]] + clip_value_min = constant_op.constant([2, 2, 2, 3, 3, 3], shape=[2, 3], + dtype=dtype) + clip_value_max = 4 + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Scalar, Tensor] + def testClipByValue2Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[4, 4, 4], [4, 5, 6]] + clip_value_min = 4 + clip_value_max = constant_op.constant([6, 6, 6, 6, 6, 6], shape=[2, 3], + dtype=dtype) + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Tensor, Tensor] + def testClipByValue3Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [5, 5, 6]] + clip_value_min = constant_op.constant([2, 2, 2, 5, 5, 5], shape=[2, 3], + dtype=dtype) + clip_value_max = constant_op.constant([5, 5, 5, 7, 7, 7], shape=[2, 3], + dtype=dtype) + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + def testClipByValueBadShape(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) @@ -51,7 +116,7 @@ class ClipTest(test.TestCase): tf_ans = ans.eval() def testClipByValueNonFinite(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')]) np_ans = [float('NaN'), 4.0, -4.0] clip_value = 4.0 @@ -63,7 +128,7 @@ class ClipTest(test.TestCase): # ClipByNorm tests def testClipByNormClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]] @@ -79,7 +144,7 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans_tensor) def testClipByNormBadShape(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) @@ -88,7 +153,7 @@ class ClipTest(test.TestCase): def testClipByNormNotClipped(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] @@ -100,7 +165,7 @@ class ClipTest(test.TestCase): def testClipByNormZero(self): # No norm clipping when norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] @@ -112,7 +177,7 @@ class ClipTest(test.TestCase): def testClipByNormClippedWithDim0(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]] @@ -124,7 +189,7 @@ class ClipTest(test.TestCase): def testClipByNormClippedWithDim1(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [3.2, 0.0, 2.4]] @@ -136,7 +201,7 @@ class ClipTest(test.TestCase): def testClipByNormNotClippedWithAxes(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 3.0]] @@ -149,7 +214,7 @@ class ClipTest(test.TestCase): # ClipByGlobalNorm tests def testClipByGlobalNormClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -170,7 +235,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormClippedTensor(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -191,7 +256,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormSupportsNone(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -214,7 +279,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormWithIndexedSlicesClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = ops.IndexedSlices( constant_op.constant([1.0, -2.0]), constant_op.constant([3, 4])) @@ -247,7 +312,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormNotClipped(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -266,7 +331,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormZero(self): # No norm clipping when norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([0.0, 0.0]) # Norm = 0, no changes @@ -285,7 +350,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormClipped(self): # Norm clipping when average clip_norm < 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]] @@ -297,7 +362,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormClippedTensor(self): # Norm clipping when average clip_norm < 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]] @@ -309,7 +374,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormNotClipped(self): # No norm clipping when average clip_norm >= 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] @@ -321,7 +386,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormZero(self): # No norm clipping when average clip_norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Average norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] -- GitLab From a5e9d9a387680b0b1d7d8ed08fc9c07477a7efe7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 30 Oct 2017 23:42:08 +0000 Subject: [PATCH 0066/1262] Add grad registration for clip_by_value and address review feedbacks. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 2 +- .../python/kernel_tests/clip_ops_test.py | 16 ++++++++++++ tensorflow/python/ops/clip_ops.py | 25 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index c2980acdd8..f30c49fdf8 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 2d03fb99e4..cb1359be15 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -23,11 +23,27 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test class ClipTest(test.TestCase): + def testClipByValueGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5) + min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32) + max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32) + outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val) + with self.test_session(): + error_1 = gradient_checker.compute_gradient_error(inputs, [4], + outputs_1, [4]) + self.assertLess(error_1, 1e-4) + + error_2 = gradient_checker.compute_gradient_error(inputs, [4], + outputs_2, [4]) + self.assertLess(error_2, 1e-4) + # ClipByValue test def testClipByValue(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index a5baebb3f6..e84cfc6944 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops @@ -64,6 +65,30 @@ def clip_by_value(t, clip_value_min, clip_value_max, clip_value_max, name=name) +@ops.RegisterGradient("ClipByValue") +def _ClipByValueGrad(op, grad): + """Returns grad of clip_by_value.""" + x = op.inputs[0] + y = op.inputs[1] + z = op.inputs[2] + gdtype = grad.dtype + sx = array_ops.shape(x) + sy = array_ops.shape(y) + sz = array_ops.shape(z) + gradshape = array_ops.shape(grad) + zeros = array_ops.zeros(gradshape, gdtype) + xymask = math_ops.less(x, y) + xzmask = math_ops.greater(x, z) + rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) + rx, rz = gen_array_ops._broadcast_gradient_args(sx, sz) + xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad) + ygrad = array_ops.where(xymask, grad, zeros) + zgrad = array_ops.where(xzmask, grad, zeros) + gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) + gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) + gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz) + return (gx, gy, gz) + @tf_export("clip_by_norm") def clip_by_norm(t, clip_norm, axes=None, name=None): -- GitLab From 71ddf90d3c8c49d4401c0d298bf63b92150dadaa Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 14 Dec 2017 04:06:58 +0000 Subject: [PATCH 0067/1262] Update with `TenaryOp` -> `ClipOp` Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index f30c49fdf8..bd22f5777c 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -25,9 +25,9 @@ typedef Eigen::GpuDevice GPUDevice; // Device: E.g., CPUDevice, GPUDevice. // Functor: defined above. E.g., functor::clip. template -class TenaryOp : public OpKernel { +class ClipOp : public OpKernel { public: - explicit TenaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + explicit ClipOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { const Tensor& in0 = ctx->input(0); @@ -178,7 +178,7 @@ INSTANTIATE_CPU(uint16); #define REGISTER_CPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint("T"), \ - TenaryOp); + ClipOp); REGISTER_CPU_KERNEL(Eigen::half); REGISTER_CPU_KERNEL(float); @@ -196,7 +196,7 @@ REGISTER_CPU_KERNEL(uint16); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("ClipByValue").Device(DEVICE_GPU).TypeConstraint("T"), \ - TenaryOp); + ClipOp); REGISTER_GPU_KERNEL(Eigen::half); REGISTER_GPU_KERNEL(float); REGISTER_GPU_KERNEL(double); @@ -216,7 +216,7 @@ REGISTER_KERNEL_BUILDER(Name("ClipByValue") .HostMemory("clip_value_max") .HostMemory("output") .TypeConstraint("T"), - TenaryOp); + ClipOp); #undef REGISTER_GPU_KERNEL #endif -- GitLab From d1078b562532e2de60bc16fc544a94823149ae77 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 18 Dec 2017 17:42:37 +0000 Subject: [PATCH 0068/1262] Fix failing test //tensorflow/python:function_test Signed-off-by: Yong Tang --- tensorflow/python/framework/function_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 65ca801cbe..24aaff3748 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -1333,7 +1333,7 @@ class UnrollLSTMTest(test.TestCase): value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1) new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid( i_g) * math_ops.tanh(i_i) - new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0) + new_c = math_ops.maximum(math_ops.minimum(new_c, 50.0), -50.0) new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c) return new_m, new_c -- GitLab From 14e9c14ecdb9e9ddb283c5ec9cf27b3c5dbb900e Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 18 Dec 2017 18:58:42 +0000 Subject: [PATCH 0069/1262] Fix api_compatibility_test with `--update_goldens True` Signed-off-by: Yong Tang --- .../base_api/api_def_ClipByValue.pbtxt | 36 +++++++++++++++++++ .../python_api/api_def_ClipByValue.pbtxt | 4 +++ 2 files changed, 40 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ClipByValue.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt new file mode 100644 index 0000000000..803d8970ab --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt @@ -0,0 +1,36 @@ +op { + graph_op_name: "ClipByValue" + in_arg { + name: "t" + description: <