From 926259c411c1022812ffb7fe88ca61f0180bd778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 09:51:09 +0800 Subject: [PATCH 0001/1931] TST: test case for string --- tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 9f57949515..83d69c651a 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -364,6 +364,16 @@ class ScatterNdTest(test.TestCase): del input_ # input_ is not used in scatter_nd return array_ops.scatter_nd(indices, updates, shape) + def testString(self): + indices = constant_op.constant([[4], [3], [1], [7]], dtype=dtypes.int32) + updates = constant_op.constant(["four", "three", "one", "seven"], dtype=dtypes.string) + expected = np.array(["", "one", "", "three", "four", "", "", "seven"]) + scatter = self.scatter_nd(indices, updates, shape=(8,)) + + with self.test_session() as sess: + result = sess.run(scatter) + self.assertTrue(np.array_equal(result, expected)) + def testRank3ValidShape(self): indices = array_ops.zeros([2, 2, 2], dtypes.int32) updates = array_ops.zeros([2, 2, 2], dtypes.int32) -- GitLab From 005840c6e2d2a4c25ecd293162a38a79dedf1a4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 10:06:44 +0800 Subject: [PATCH 0002/1931] ENH: supports string for cpu --- tensorflow/core/kernels/scatter_nd_op.cc | 1 + tensorflow/core/kernels/scatter_nd_op_cpu_impl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 3a95dd1773..0caa7bd317 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -241,6 +241,7 @@ class ScatterNdUpdateOp : public OpKernel { TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_ADD_SUB_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_UPDATE_CPU); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_CPU); +TF_CALL_string(REGISTER_SCATTER_ND_CPU); // Registers GPU kernels. #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h index cffc326174..155d354d85 100644 --- a/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/scatter_nd_op_cpu_impl.h @@ -160,6 +160,7 @@ struct ScatterNdFunctor { REGISTER_SCATTER_ND_INDEX(type, scatter_nd_op::UpdateOp::SUB); TF_CALL_ALL_TYPES(REGISTER_SCATTER_ND_UPDATE); +REGISTER_SCATTER_ND_INDEX(string, scatter_nd_op::UpdateOp::ADD); TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ND_MATH) #undef REGISTER_SCATTER_ND_MATH -- GitLab From d887d2bcfc819034b17e812a9a60460e2d61e447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 12:14:40 +0800 Subject: [PATCH 0003/1931] TST: ignore NonAliasingAdd --- tensorflow/python/kernel_tests/scatter_nd_ops_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 83d69c651a..03b2f892c6 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -594,6 +594,10 @@ class ScatterNdNonAliasingAddTest(ScatterNdTest): shape, dtype=updates.dtype)) return array_ops.scatter_nd_non_aliasing_add(input_, indices, updates) + def testString(self): + # Not supported yet. + pass + if __name__ == "__main__": test.main() -- GitLab From 4b697e0d9472215c706bdb36bb72986cdce78edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 14 Dec 2017 13:51:34 +0800 Subject: [PATCH 0004/1931] DOC: modify document --- tensorflow/core/ops/array_ops.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 5a31f433ce..933ebe6b63 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -5332,12 +5332,13 @@ REGISTER_OP("ScatterNd") .Attr("Tindices: {int32, int64}") .SetShapeFn(ScatterNdShape) .Doc(R"doc( -Scatter `updates` into a new (initially zero) tensor according to `indices`. +Scatter `updates` into a new (initially zero for numeric, empty for string) +tensor according to `indices`. -Creates a new tensor by applying sparse `updates` to individual -values or slices within a zero tensor of the given `shape` according to -indices. This operator is the inverse of the @{tf.gather_nd} operator which -extracts values or slices from a given tensor. +Creates a new tensor by applying sparse `updates` to individual values or +slices within a zero (or empty string) tensor of the given `shape` +according to indices. This operator is the inverse of the @{tf.gather_nd} +operator which extracts values or slices from a given tensor. **WARNING**: The order in which updates are applied is nondeterministic, so the output will be nondeterministic if `indices` contains duplicates. -- GitLab From 597403e03680d69b72dbfa669f7bbdc77ce21ec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 20 Dec 2017 16:34:48 +0800 Subject: [PATCH 0005/1931] CLN: conform docstring --- tensorflow/core/ops/array_ops.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 933ebe6b63..89b6eb7162 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -5332,13 +5332,12 @@ REGISTER_OP("ScatterNd") .Attr("Tindices: {int32, int64}") .SetShapeFn(ScatterNdShape) .Doc(R"doc( -Scatter `updates` into a new (initially zero for numeric, empty for string) -tensor according to `indices`. +Scatter `updates` into a new empty tensor according to `indices`. Creates a new tensor by applying sparse `updates` to individual values or -slices within a zero (or empty string) tensor of the given `shape` -according to indices. This operator is the inverse of the @{tf.gather_nd} -operator which extracts values or slices from a given tensor. +slices within a tensor (initially zero for numeric, empty for string) of +the given `shape` according to indices. This operator is the inverse of the +@{tf.gather_nd} operator which extracts values or slices from a given tensor. **WARNING**: The order in which updates are applied is nondeterministic, so the output will be nondeterministic if `indices` contains duplicates. -- GitLab From e2a0db74cfa4ed73692ec5d0af944660bb4b688c Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Tue, 6 Feb 2018 17:52:07 -0800 Subject: [PATCH 0006/1931] Python3 support of docs generation --- tensorflow/docs_src/community/documentation.md | 18 +++--------------- tensorflow/tools/docs/BUILD | 2 +- tensorflow/tools/docs/build_docs_test.py | 4 ---- tensorflow/tools/docs/generate_lib.py | 2 -- tensorflow/tools/docs/generate_lib_test.py | 3 --- tensorflow/tools/docs/parser.py | 4 ++-- tensorflow/tools/docs/parser_test.py | 4 ---- tensorflow/tools/docs/pretty_docs.py | 12 ++++++------ tensorflow/workspace.bzl | 11 ----------- 9 files changed, 12 insertions(+), 48 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 003e0a25ec..8d55148e48 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,19 +148,7 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If pip isn't installed on your machine, install it now by issuing the -following command: - - $ sudo easy_install pip - -2. Use pip to install codegen, mock, and pandas by issuing the following - command (Note: If you are using - a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your - dependencies, you may not want to use sudo for these installations): - - $ sudo pip install codegen mock pandas - -3. If bazel is not installed on your machine, install it now. If you are on +1. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -168,10 +156,10 @@ following command: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -4. Change directory to the top-level `tensorflow` directory of the TensorFlow +2. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -5. Run the `configure` script and answer its prompts appropriately for your +3. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index 8f10bc9e0c..cafa1f7eb3 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -37,7 +37,7 @@ py_library( srcs = ["parser.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = ["@com_github_andreif_codegen"], + deps = ["@astor_archive//:astor"], ) py_test( diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py index ae293f6576..2e8f634e7c 100644 --- a/tensorflow/tools/docs/build_docs_test.py +++ b/tensorflow/tools/docs/build_docs_test.py @@ -39,10 +39,6 @@ class Flags(object): class BuildDocsTest(googletest.TestCase): def testBuildDocs(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - doc_generator = generate_lib.DocGenerator() doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)]) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 003f972070..635408d87f 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -455,8 +455,6 @@ class DocGenerator(object): """Main entry point for generating docs.""" def __init__(self): - if sys.version_info >= (3, 0): - sys.exit('Doc generation is not supported from python3.') self.argument_parser = argparse.ArgumentParser() self._py_modules = None self._private_map = _get_default_private_map() diff --git a/tensorflow/tools/docs/generate_lib_test.py b/tensorflow/tools/docs/generate_lib_test.py index 1ceaf31f1c..ea6d28a02b 100644 --- a/tensorflow/tools/docs/generate_lib_test.py +++ b/tensorflow/tools/docs/generate_lib_test.py @@ -52,9 +52,6 @@ class DummyVisitor(object): class GenerateTest(googletest.TestCase): def test_write(self): - if sys.version_info >= (3, 0): - self.skipTest('Warning: Doc generation is not supported from python3.') - module = sys.modules[__name__] index = { diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 3db164c2b5..1798378d55 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -26,7 +26,7 @@ import os import re import sys -import codegen +import astor import six from google.protobuf.message import Message as ProtoMessage @@ -705,7 +705,7 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = codegen.to_source(ast_default) + default_text = astor.to_source(ast_default) if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. diff --git a/tensorflow/tools/docs/parser_test.py b/tensorflow/tools/docs/parser_test.py index 8a0e9af521..7d2bf9177a 100644 --- a/tensorflow/tools/docs/parser_test.py +++ b/tensorflow/tools/docs/parser_test.py @@ -523,10 +523,6 @@ class TestParseFunctionDetails(googletest.TestCase): class TestGenerateSignature(googletest.TestCase): def test_known_object(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - known_object = object() reverse_index = {id(known_object): 'location.of.object.in.api'} diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py index 543b5fa6fe..55ab5bdd49 100644 --- a/tensorflow/tools/docs/pretty_docs.py +++ b/tensorflow/tools/docs/pretty_docs.py @@ -101,7 +101,7 @@ def _build_class_page(page_info): link_template = '[`{short_name}`]({url})' parts.append(', '.join( - link_template.format(**base.__dict__) for base in page_info.bases)) + link_template.format(**base._asdict()) for base in page_info.bases)) parts.append('\n\n') @@ -159,7 +159,7 @@ def _build_class_page(page_info): h3 = ('

' '{short_name}' '

\n\n') - parts.append(h3.format(**method_info.__dict__)) + parts.append(h3.format(**method_info._asdict())) if method_info.signature is not None: parts.append(_build_signature(method_info, use_full_name=False)) @@ -217,7 +217,7 @@ def _build_module_page(page_info): template = '[`{short_name}`]({url}) module' for item in page_info.modules: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -229,7 +229,7 @@ def _build_module_page(page_info): template = '[`class {short_name}`]({url})' for item in page_info.classes: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -241,7 +241,7 @@ def _build_module_page(page_info): template = '[`{short_name}(...)`]({url})' for item in page_info.functions: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -254,7 +254,7 @@ def _build_module_page(page_info): parts.append('## Other Members\n\n') for item in page_info.other_members: - parts.append('`{short_name}`\n\n'.format(**item.__dict__)) + parts.append('`{short_name}`\n\n'.format(**item._asdict())) return ''.join(parts) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index eca744a920..4a2274eb1a 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -328,17 +328,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:backports_weakref.BUILD")), ) - tf_http_archive( - name = "com_github_andreif_codegen", - urls = [ - "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz", - "https://github.com/andreif/codegen/archive/1.0.tar.gz", - ], - sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", - strip_prefix = "codegen-1.0", - build_file = str(Label("//third_party:codegen.BUILD")), - ) - filegroup_external( name = "org_python_license", licenses = ["notice"], # Python 2.0 -- GitLab From 4f5d9a88f84e2261808bc986ece951e6e1d10725 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Tue, 6 Feb 2018 17:55:15 -0800 Subject: [PATCH 0007/1931] remove unused codegen.BUILD --- third_party/codegen.BUILD | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 third_party/codegen.BUILD diff --git a/third_party/codegen.BUILD b/third_party/codegen.BUILD deleted file mode 100644 index df436c8163..0000000000 --- a/third_party/codegen.BUILD +++ /dev/null @@ -1,16 +0,0 @@ -# -*- mode: python; -*- -# -# Description: -# Extension to ast that allow ast -> python code generation. - -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # New BSD - -exports_files(["LICENSE"]) - -py_library( - name = "com_github_andreif_codegen", - srcs = glob(["codegen.py"]), - srcs_version = "PY2AND3", -) -- GitLab From 736e8c4ccb16718d11cf7c8e1fac843bf6e388a7 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Wed, 14 Feb 2018 18:26:20 +0900 Subject: [PATCH 0008/1931] fix typo --- tensorflow/core/lib/io/record_writer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/io/record_writer.cc b/tensorflow/core/lib/io/record_writer.cc index 3657243c5d..ebc5648269 100644 --- a/tensorflow/core/lib/io/record_writer.cc +++ b/tensorflow/core/lib/io/record_writer.cc @@ -49,7 +49,7 @@ RecordWriterOptions RecordWriterOptions::CreateRecordWriterOptions( #endif // IS_SLIM_BUILD } else if (compression_type != compression::kNone) { LOG(ERROR) << "Unsupported compression_type:" << compression_type - << ". No comprression will be used."; + << ". No compression will be used."; } return options; } -- GitLab From 617fa4e5fa634270c36a2a8762e6ce96bd38f2f8 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Wed, 14 Feb 2018 18:35:31 +0900 Subject: [PATCH 0009/1931] fix typo --- tensorflow/contrib/makefile/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index b0228c5435..995230dfa8 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -155,7 +155,7 @@ CC_PREFIX=ccache tensorflow/contrib/makefile/build_all_android.sh -s tensorflow/ (add -T on subsequent builds to skip protobuf downloading/building) -#### Testing the the CUDA-enabled benchmark via adb: +#### Testing the CUDA-enabled benchmark via adb: Build binaries first as above, then run: ```bash -- GitLab From 15f3b920ad7eb7fcca3afee14d16049db2046d4b Mon Sep 17 00:00:00 2001 From: Nathan Luehr Date: Wed, 14 Feb 2018 16:27:23 -0800 Subject: [PATCH 0010/1931] Fix __shared__ types with non-empty constructor std::complex has a non-empty constructor (zero assignment) that is not compatible with CUDA __shared__ memory. This fixes current reliance on undefined behavior. (and removes an unnecessary run-time initialization). --- .../core/kernels/reduction_gpu_kernels.cu.h | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 15ae4c1fc5..95a3e222b5 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -244,6 +244,33 @@ __global__ void RowReduceKernel( if (row < num_rows && lane == 0) out[row] = sum; } +template +struct storage_type { + T1 val; + __host__ __device__ storage_type() {} + __host__ __device__ operator T1() { return val; } + __host__ __device__ storage_type& operator=(const T1& in) { + val = in; + return *this; + } +}; + +template +struct storage_type> { + T2 real; + T2 imag; + __host__ __device__ storage_type() {} + __host__ __device__ operator std::complex() { + return std::complex(real, imag); + } + __host__ __device__ storage_type>& operator=( + const std::complex& in) { + real = in.real(); + imag = in.imag(); + return *this; + } +}; + // Works only if there are <= 16 columns // each warps sums over multiple rows at once template @@ -268,7 +295,7 @@ __global__ void ColumnReduceMax16ColumnsKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += rows_per_warp * gridDim.y * blockDim.y; for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { @@ -294,7 +321,8 @@ __global__ void ColumnReduceMax16ColumnsKernel( if (blockDim.y > 1) { for (int row = 1; row < blockDim.y; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } } @@ -316,7 +344,7 @@ __global__ void ColumnReduceKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += gridDim.y * blockDim.y; @@ -347,7 +375,8 @@ __global__ void ColumnReduceKernel( min(blockDim.y, num_rows - blockIdx.y * blockDim.y); for (int row = 1; row < numRowsThisBlock; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } out[col * gridDim.y + blockIdx.y] = s; -- GitLab From b81aaac898d93e17b4a280bb02547d2a60d490cb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Feb 2018 08:28:12 +0000 Subject: [PATCH 0011/1931] Fix warnings in tf.contrib.bayesflow.monte_carlo.expectation This fix fixes several warnings in tf.contrib.bayesflow.monte_carlo.expectation by switching to keepdims for tf.reduce_mean. Signed-off-by: Yong Tang --- tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py index 985177e897..5263e87ae6 100644 --- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py @@ -328,7 +328,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, if not callable(f): raise ValueError('`f` must be a callable function.') if use_reparametrization: - return math_ops.reduce_mean(f(samples), axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(f(samples), axis=axis, keepdims=keep_dims) else: if not callable(log_prob): raise ValueError('`log_prob` must be a callable function.') @@ -348,7 +348,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, # "Is there a floating point value of x, for which x-x == 0 is false?" # http://stackoverflow.com/q/2686644 fx += stop(fx) * (logpx - stop(logpx)) # Add zeros_like(logpx). - return math_ops.reduce_mean(fx, axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(fx, axis=axis, keepdims=keep_dims) def _sample_mean(values): -- GitLab From 9c272adf248228408448db6219b238145f5a02ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 16 Feb 2018 10:38:50 +0800 Subject: [PATCH 0012/1931] DOC: move doc to api def file --- .../core/api_def/base_api/api_def_ScatterNd.pbtxt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt index 4cb8c064fc..4e95895f54 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNd.pbtxt @@ -25,12 +25,12 @@ A new tensor with the given shape and updates applied according to the indices. END } - summary: "Scatter `updates` into a new (initially zero) tensor according to `indices`." + summary: "Scatter `updates` into a new empty tensor according to `indices`." description: < Date: Mon, 19 Feb 2018 12:56:40 +0400 Subject: [PATCH 0013/1931] Add broadcasting functionality fro Div and Sub ops. --- tensorflow/contrib/lite/kernels/div.cc | 117 ++++++-- tensorflow/contrib/lite/kernels/div_test.cc | 174 ++++++++++++ .../internal/optimized/optimized_ops.h | 268 +++++++++++++++++- .../internal/reference/reference_ops.h | 257 +++++++++++++++++ tensorflow/contrib/lite/kernels/sub.cc | 135 +++++++-- tensorflow/contrib/lite/kernels/sub_test.cc | 213 ++++++++++++++ .../testing/generated_examples_zip_test.cc | 15 +- 7 files changed, 1122 insertions(+), 57 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/div_test.cc create mode 100644 tensorflow/contrib/lite/kernels/sub_test.cc diff --git a/tensorflow/contrib/lite/kernels/div.cc b/tensorflow/contrib/lite/kernels/div.cc index 44bd0dc85d..c77a0de9b7 100644 --- a/tensorflow/contrib/lite/kernels/div.cc +++ b/tensorflow/contrib/lite/kernels/div.cc @@ -37,7 +37,23 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; +struct OpData { + bool requires_broadcast; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -45,35 +61,85 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); - for (int i = 0; i < NumDimensions(input1); ++i) { - TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), - SizeOfDimension(input2, i)); - } + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = input2->type; + + data->requires_broadcast = !HaveSameShapes(input1, input2); - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - TF_LITE_ENSURE_EQ(context, input2->type, output->type); + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); return context->ResizeTensor(context, output, output_size); } template -void EvalDivFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteDivParams* params, TfLiteTensor* input1, - TfLiteTensor* input2, TfLiteTensor* output) { +void EvalFloat(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); -#define TF_LITE_DIV(type) \ - type::Div(GetTensorData(input1), GetTensorDims(input1), \ - GetTensorData(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData(output), GetTensorDims(output)) +#define TF_LITE_DIV(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_DIV(reference_ops, BroadcastDiv); + } else { + TF_LITE_DIV(reference_ops, Div); + } + } else { + if (data->requires_broadcast) { + TF_LITE_DIV(optimized_ops, BroadcastDiv); + } else { + TF_LITE_DIV(optimized_ops, Div); + } + } +#undef TF_LITE_DIV +} + +template +void EvalQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteDivParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + + int32_t output_multiplier; + int output_shift; + + double real_multiplier = + input1->params.scale * input2->params.scale / output->params.scale; + QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_DIV(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + input1_offset, GetTensorData(input2), \ + GetTensorDims(input2), input2_offset, output_offset, \ + output_multiplier, output_shift, output_activation_min, \ + output_activation_max, GetTensorData(output), \ + GetTensorDims(output)); + // The quantized version of Div doesn't support activations, so we + // always use BroadcastDiv. if (kernel_type == kReference) { - TF_LITE_DIV(reference_ops); + TF_LITE_DIV(reference_ops, BroadcastDiv); } else { - TF_LITE_DIV(optimized_ops); + TF_LITE_DIV(optimized_ops, BroadcastDiv); } #undef TF_LITE_DIV } @@ -81,15 +147,20 @@ void EvalDivFloat(TfLiteContext* context, TfLiteNode* node, template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { - EvalDivFloat(context, node, params, input1, input2, output); + EvalFloat(context, node, params, data, input1, input2, output); + } else if (output->type == kTfLiteUInt8) { + EvalQuantized(context, node, params, data, input1, input2, + output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Div only supports FLOAT32 and quantized UINT8 now."); return kTfLiteError; } @@ -99,19 +170,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace div TfLiteRegistration* Register_DIV_REF() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } TfLiteRegistration* Register_DIV_GENERIC_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } TfLiteRegistration* Register_DIV_NEON_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, div::Prepare, + static TfLiteRegistration r = {div::Init, div::Free, div::Prepare, div::Eval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/div_test.cc b/tensorflow/contrib/lite/kernels/div_test.cc new file mode 100644 index 0000000000..78918a0d79 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/div_test.cc @@ -0,0 +1,174 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseDivOpModel : public SingleOpModel { + public: + BaseDivOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_DIV, BuiltinOptions_DivOptions, + CreateDivOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +// For quantized Div, the error shouldn't exceed (2*step + step^2). +// The param min=-1.0 & max=1.0 is used in the following tests. +// The tolerance value is ~0.0157. +const float kQuantizedStep = 2.0 / 255.0; +const float kQuantizedTolerance = + 2.0 * kQuantizedStep + kQuantizedStep * kQuantizedStep; + +class QuantizedDivOpModel : public BaseDivOpModel { + public: + using BaseDivOpModel::BaseDivOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +TEST(FloatDivOpTest, NoActivation) { + FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.5, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-0.4, 1.0, 0.8, 1.6}))); +} + +TEST(FloatDivOpTest, ActivationRELU_N1_TO_1) { + FloatDivOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-0.2, 0.2, -1.2, 0.8}); + m.PopulateTensor(m.input2(), {0.1, 0.2, -1.5, 0.5}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-1.0, 1.0, 0.8, 1.0}))); +} + +TEST(FloatDivOpTest, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 0.3, 0.8, 1.1, -2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.6, 0.5, -1.1, -0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-20.0, 1.0, 0.5, 1.6, -1.0, 20.0}))) + << "With shape number " << i; + } +} + +TEST(FloatDivOpTest, WithBroadcast) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.PopulateTensor(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.0, 2.0, 0.7, 0.8, 1.1, -1.23}))) + << "With shape number " << i; + } +} + +TEST(QuantizedDivOpTest, NoActivation) { + QuantizedDivOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-0.6, 0.2, 0.9, -0.7}); + m.QuantizeAndPopulate(m.input2(), {0.8, 0.4, 0.9, -0.8}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-0.75, 0.5, 1.0, 0.875}, + kQuantizedTolerance))); +} + +// for quantized Div, the error shouldn't exceed 2*step +float GetTolerance(int min, int max) { + float kQuantizedStep = (max - min) / 255.0; + float kQuantizedTolerance = 2.0 * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST(QuantizedDivOpTest, WithBroadcast) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedDivOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, // always a scalar + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.QuantizeAndPopulate(m.input2(), {0.1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23}, kQuantizedTolerance))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index dec58fea4f..d12a3eca1d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1928,6 +1928,126 @@ inline void Div(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastDiv is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] / + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val / input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, + input2_dims, input2_offset, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data, output_dims); +} + // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, @@ -1955,6 +2075,152 @@ inline void Sub(const float* input1_data, const Dims<4>& input1_dims, } } } + +// TODO(jiawen): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +// TODO(benoitjacob): BroadcastSub is intentionally duplicated from +// reference_ops.h. Once an optimized version is implemented and NdArrayDesc +// is no longer referenced in this file, move NdArrayDesc from types.h to +// reference_ops.h. +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val - scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + BroadcastSub(left_shift, input1_data, input1_dims, input1_offset, + input1_multiplier, input1_shift, input2_data, input2_dims, + input2_offset, input2_multiplier, input2_shift, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, @@ -2866,7 +3132,7 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); +gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 5f4d5be323..c7b7687622 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1208,6 +1208,122 @@ inline void Div(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] / + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastDiv/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest + // stride, typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for + // the best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 unclamped_result = + output_offset + + MultiplyByQuantizedMultiplierSmallerThanOne( + input1_val / input2_val, output_multiplier, output_shift); + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, unclamped_result)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, + int32 input1_offset, const uint8* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, + input2_dims, input2_offset, output_offset, output_multiplier, + output_shift, output_activation_min, output_activation_max, + output_data, output_dims); +} + inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, @@ -1235,6 +1351,147 @@ inline void Sub(const float* input1_data, const Dims<4>& input1_dims, } } +// TODO(jiawen): We can implement BroadcastSub on buffers of arbitrary +// dimensionality if the runtime code does a single loop over one dimension +// that handles broadcasting as the base case. The code generator would then +// generate max(D1, D2) nested for loops. +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + output_data[Offset(output_dims, c, x, y, b)] = + ActivationFunctionWithMinMax( + input1_data[SubscriptToIndex(desc1, c, x, y, b)] - + input2_data[SubscriptToIndex(desc2, c, x, y, b)], + output_activation_min, output_activation_max); + } + } + } + } +} + +// legacy, for compatibility with old checked-in code +template +void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T* output_data, const Dims<4>& output_dims) { + T output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + + BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("BroadcastSub/8bit"); + + NdArrayDesc<4> desc1; + NdArrayDesc<4> desc2; + NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); + + // In Tensorflow, the dimensions are canonically named (batch_number, row, + // col, channel), with extents (batches, height, width, depth), with the + // trailing dimension changing most rapidly (channels has the smallest stride, + // typically 1 element). + // + // In generated C code, we store arrays with the dimensions reversed. The + // first dimension has smallest stride. + // + // We name our variables by their Tensorflow convention, but generate C code + // nesting loops such that the innermost loop has the smallest stride for the + // best cache behavior. + for (int b = 0; b < ArraySize(output_dims, 3); ++b) { + for (int y = 0; y < ArraySize(output_dims, 2); ++y) { + for (int x = 0; x < ArraySize(output_dims, 1); ++x) { + for (int c = 0; c < ArraySize(output_dims, 0); ++c) { + const int32 input1_val = + input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; + const int32 input2_val = + input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; + const int32 shifted_input1_val = input1_val * (1 << left_shift); + const int32 shifted_input2_val = input2_val * (1 << left_shift); + const int32 scaled_input1_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input1_val, input1_multiplier, input1_shift); + const int32 scaled_input2_val = + MultiplyByQuantizedMultiplierSmallerThanOne( + shifted_input2_val, input2_multiplier, input2_shift); + const int32 raw_sum = scaled_input1_val - scaled_input2_val; + const int32 raw_output = + MultiplyByQuantizedMultiplierSmallerThanOne( + raw_sum, output_multiplier, output_shift) + + output_offset; + const int32 clamped_output = + std::min(output_activation_max, + std::max(output_activation_min, raw_output)); + output_data[Offset(output_dims, c, x, y, b)] = + static_cast(clamped_output); + } + } + } + } +} + +template +inline void BroadcastSub(int left_shift, const uint8* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const uint8* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + BroadcastSub(left_shift, input1_data, input1_dims, input1_offset, + input1_multiplier, input1_shift, input2_data, input2_dims, + input2_offset, input2_multiplier, input2_shift, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); +} + template void Concatenation(int concat_dim, const Scalar* const* input_data, const Dims<4>* const* input_dims, int inputs_count, diff --git a/tensorflow/contrib/lite/kernels/sub.cc b/tensorflow/contrib/lite/kernels/sub.cc index ddaf498d5b..410585a293 100644 --- a/tensorflow/contrib/lite/kernels/sub.cc +++ b/tensorflow/contrib/lite/kernels/sub.cc @@ -26,7 +26,7 @@ namespace ops { namespace builtin { namespace sub { -// This file has three implementation of Div. +// This file has three implementation of Sub. enum KernelType { kReference, kGenericOptimized, // Neon-free @@ -37,7 +37,23 @@ constexpr int kInputTensor1 = 0; constexpr int kInputTensor2 = 1; constexpr int kOutputTensor = 0; +struct OpData { + bool requires_broadcast; +}; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new OpData; + data->requires_broadcast = false; + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); @@ -45,51 +61,122 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumDimensions(input1), NumDimensions(input2)); - for (int i = 0; i < NumDimensions(input1); ++i) { - TF_LITE_ENSURE_EQ(context, SizeOfDimension(input1, i), - SizeOfDimension(input2, i)); - } + TF_LITE_ENSURE_EQ(context, input1->type, input2->type); + output->type = input2->type; - TF_LITE_ENSURE_EQ(context, input1->type, output->type); - TF_LITE_ENSURE_EQ(context, input2->type, output->type); + data->requires_broadcast = !HaveSameShapes(input1, input2); + + TfLiteIntArray* output_size = nullptr; + if (data->requires_broadcast) { + TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast( + context, input1, input2, &output_size)); + } else { + output_size = TfLiteIntArrayCopy(input1->dims); + } - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input1->dims); return context->ResizeTensor(context, output, output_size); } template void EvalSubFloat(TfLiteContext* context, TfLiteNode* node, - TfLiteSubParams* params, TfLiteTensor* input1, - TfLiteTensor* input2, TfLiteTensor* output) { + TfLiteSubParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { float output_activation_min, output_activation_max; CalculateActivationRangeFloat(params->activation, &output_activation_min, &output_activation_max); -#define TF_LITE_Sub(type) \ - type::Sub(GetTensorData(input1), GetTensorDims(input1), \ - GetTensorData(input2), GetTensorDims(input2), \ - output_activation_min, output_activation_max, \ - GetTensorData(output), GetTensorDims(output)) +#define TF_LITE_SUB(type, opname) \ + type::opname(GetTensorData(input1), GetTensorDims(input1), \ + GetTensorData(input2), GetTensorDims(input2), \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)) + if (kernel_type == kReference) { + if (data->requires_broadcast) { + TF_LITE_SUB(reference_ops, BroadcastSub); + } else { + TF_LITE_SUB(reference_ops, Sub); + } + } else { + if (data->requires_broadcast) { + TF_LITE_SUB(optimized_ops, BroadcastSub); + } else { + TF_LITE_SUB(optimized_ops, Sub); + } + } +#undef TF_LITE_SUB +} + +template +void EvalSubQuantized(TfLiteContext* context, TfLiteNode* node, + TfLiteSubParams* params, const OpData* data, + TfLiteTensor* input1, TfLiteTensor* input2, + TfLiteTensor* output) { + auto input1_offset = -input1->params.zero_point; + auto input2_offset = -input2->params.zero_point; + auto output_offset = output->params.zero_point; + const int left_shift = 20; + const double twice_max_input_scale = + 2 * std::max(input1->params.scale, input2->params.scale); + const double real_input1_multiplier = + input1->params.scale / twice_max_input_scale; + const double real_input2_multiplier = + input2->params.scale / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / ((1 << left_shift) * output->params.scale); + + int32 input1_multiplier; + int input1_shift; + QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, + &input1_shift); + int32 input2_multiplier; + int input2_shift; + QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, + &input2_shift); + int32 output_multiplier; + int output_shift; + QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, + &output_shift); + + int32 output_activation_min, output_activation_max; + CalculateActivationRangeUint8(params->activation, output, + &output_activation_min, &output_activation_max); + +#define TF_LITE_SUB(type, opname) \ + type::opname(left_shift, GetTensorData(input1), \ + GetTensorDims(input1), input1_offset, input1_multiplier, \ + input1_shift, GetTensorData(input2), \ + GetTensorDims(input2), input2_offset, input2_multiplier, \ + input2_shift, output_offset, output_multiplier, output_shift, \ + output_activation_min, output_activation_max, \ + GetTensorData(output), GetTensorDims(output)); + // The quantized version of Sub doesn't support activations, so we + // always use BroadcastSub. if (kernel_type == kReference) { - TF_LITE_Sub(reference_ops); + TF_LITE_SUB(reference_ops, BroadcastSub); } else { - TF_LITE_Sub(optimized_ops); + TF_LITE_SUB(optimized_ops, BroadcastSub); } -#undef TF_LITE_Sub +#undef TF_LITE_SUB } template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input1 = GetInput(context, node, kInputTensor1); TfLiteTensor* input2 = GetInput(context, node, kInputTensor2); TfLiteTensor* output = GetOutput(context, node, kOutputTensor); if (output->type == kTfLiteFloat32) { - EvalSubFloat(context, node, params, input1, input2, output); + EvalSubFloat(context, node, params, data, input1, input2, + output); + } else if (output->type == kTfLiteUInt8) { + EvalSubQuantized(context, node, params, data, input1, input2, + output); } else { - context->ReportError(context, "Inputs and outputs not all float types."); + context->ReportError(context, + "Inputs and outputs not all float|unit8 types."); return kTfLiteError; } @@ -99,19 +186,19 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } // namespace sub TfLiteRegistration* Register_SUB_REF() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } TfLiteRegistration* Register_SUB_GENERIC_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } TfLiteRegistration* Register_SUB_NEON_OPT() { - static TfLiteRegistration r = {nullptr, nullptr, sub::Prepare, + static TfLiteRegistration r = {sub::Init, sub::Free, sub::Prepare, sub::Eval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/sub_test.cc b/tensorflow/contrib/lite/kernels/sub_test.cc new file mode 100644 index 0000000000..b2c6d05f62 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/sub_test.cc @@ -0,0 +1,213 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class BaseSubOpModel : public SingleOpModel { + public: + BaseSubOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output, + ActivationFunctionType activation_type) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_Sub, BuiltinOptions_SubOptions, + CreateSubOptions(builder_, activation_type).Union()); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + + protected: + int input1_; + int input2_; + int output_; +}; + +class FloatSubOpModel : public BaseSubOpModel { + public: + using BaseSubOpModel::BaseSubOpModel; + + std::vector GetOutput() { return ExtractVector(output_); } +}; + +class QuantizedSubOpModel : public BaseSubOpModel { + public: + using BaseSubOpModel::BaseSubOpModel; + + std::vector GetDequantizedOutput() { + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); + } +}; + +// for quantized Sub, the error shouldn't exceed 2*step +float GetTolerance(int min, int max) { + float kQuantizedStep = (max - min) / 255.0; + float kQuantizedTolerance = 2.0 * kQuantizedStep; + return kQuantizedTolerance; +} + +TEST(FloatSubOpModel, NoActivation) { + FloatSubOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-2.1, 0.0, 1.4, -0.3})); +} + +TEST(FloatSubOpModel, ActivationRELU_N1_TO_1) { + FloatSubOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.0, 0.0, 1.0, -0.3})); +} + +TEST(FloatSubOpModel, VariousInputShapes) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatSubOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5, -1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.1, 0.2, 0.3, 0.8, -1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({-2.1, 0.0, 1.4, -0.3, 0.0, 1.9})) + << "With shape number " << i; + } +} + +TEST(FloatSubOpModel, WithBroadcast) { + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + FloatSubOpModel m({TensorType_FLOAT32, test_shapes[i]}, + {TensorType_FLOAT32, {}}, // always a scalar + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE); + m.PopulateTensor(m.input1(), {-2.0, 0.2, 1.7, 0.5, -1.1, 2.0}); + m.PopulateTensor(m.input2(), {0.5}); + m.Invoke(); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear({-2.5, -0.3, 1.2, 0.0, -1.6, 1.5}))) + << "With shape number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = { + {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = { + {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.2}, {0.6, 0.4, -0.18, 0.5}}; + std::vector> results = { + {-0.5, -0.2, 0.0, 0.3}, {-0.8, -0.2, -0.1, 0.9}, {-0.61, -0.2, 0.88, -0.2}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedTestsActivationRELU_N1_TO_1) { + float kQuantizedTolerance = GetTolerance(-1.0, 1.0); + std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.8, 0.2, 0.7, 0.5}}; + std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.4, -0.8, 0.3}}; + std::vector> results = {{-1.0, -0.2, 0.0, 1.0}, + {-1.0, -0.2, 1.0, 0.2}}; + for (int i = 0; i < inputs1.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}, + ActivationFunctionType_RELU_N1_TO_1); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( + results[i], kQuantizedTolerance))) + << "With test number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, + kQuantizedTolerance))) + << "With shape number " << i; + } +} + +TEST(QuantizedSubOpModel, QuantizedWithBroadcast) { + float kQuantizedTolerance = GetTolerance(-3.0, 3.0); + std::vector> test_shapes = { + {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; + for (int i = 0; i < test_shapes.size(); ++i) { + QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + {TensorType_UINT8, {}, -3.0, 3.0}, + ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.7}); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear({-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, + kQuantizedTolerance))) + << "With shape number " << i; + } +} + +} // namespace +} // namespace tflite +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 49766cedac..1e177d5f6e 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -47,9 +47,6 @@ tensorflow::Env* env = tensorflow::Env::Default(); // Key is a substring of the test name and value is a bug number. // TODO(ahentz): make sure we clean this list up frequently. std::map kBrokenTests = { - // Sub and Div don't support broadcasting. - {R"(^\/diva.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, - {R"(^\/suba.*input_shape_1=\[1,3,4,3\],input_shape_2=\[3\])", "68500195"}, // Add only supports float32. (and "constant" tests use Add) {R"(^\/adda.*int32)", "68808744"}, @@ -235,22 +232,23 @@ TEST_P(OpsTest, RunStuff) { INSTANTIATE_TESTS(add) INSTANTIATE_TESTS(avg_pool) -INSTANTIATE_TESTS(space_to_batch_nd) INSTANTIATE_TESTS(batch_to_space_nd) INSTANTIATE_TESTS(concat) INSTANTIATE_TESTS(constant) INSTANTIATE_TESTS(control_dep) INSTANTIATE_TESTS(conv) INSTANTIATE_TESTS(depthwiseconv) +INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(exp) INSTANTIATE_TESTS(fully_connected) INSTANTIATE_TESTS(fused_batch_norm) INSTANTIATE_TESTS(gather) INSTANTIATE_TESTS(global_batch_norm) -INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(l2_pool) +INSTANTIATE_TESTS(l2norm) INSTANTIATE_TESTS(local_response_norm) INSTANTIATE_TESTS(max_pool) +INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(mul) INSTANTIATE_TESTS(pad) INSTANTIATE_TESTS(relu) @@ -260,14 +258,13 @@ INSTANTIATE_TESTS(reshape) INSTANTIATE_TESTS(resize_bilinear) INSTANTIATE_TESTS(sigmoid) INSTANTIATE_TESTS(softmax) +INSTANTIATE_TESTS(space_to_batch_nd) INSTANTIATE_TESTS(space_to_depth) -INSTANTIATE_TESTS(sub) INSTANTIATE_TESTS(split) -INSTANTIATE_TESTS(div) -INSTANTIATE_TESTS(transpose) -INSTANTIATE_TESTS(mean) INSTANTIATE_TESTS(squeeze) INSTANTIATE_TESTS(strided_slice) +INSTANTIATE_TESTS(sub) +INSTANTIATE_TESTS(transpose) } // namespace testing } // namespace tflite -- GitLab From 779d457008ab7ea2c11f4d73370099a1e56c0652 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Sun, 25 Feb 2018 21:39:52 +0900 Subject: [PATCH 0014/1931] fix typo --- .../python/kernel_tests/linalg/linear_operator_diag_test.py | 2 +- tensorflow/python/ops/linalg/linear_operator_diag.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py index 343d158498..8cb9f9e621 100644 --- a/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py +++ b/tensorflow/python/kernel_tests/linalg/linear_operator_diag_test.py @@ -129,7 +129,7 @@ class LinearOperatorDiagTest( with self.test_session() as sess: x = random_ops.random_normal(shape=(2, 2, 3, 4)) - # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve + # This LinearOperatorDiag will be broadcast to (2, 2, 3, 3) during solve # and matmul with 'x' as the argument. diag = random_ops.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag, is_self_adjoint=True) diff --git a/tensorflow/python/ops/linalg/linear_operator_diag.py b/tensorflow/python/ops/linalg/linear_operator_diag.py index b3ec3d5b7c..e180e83026 100644 --- a/tensorflow/python/ops/linalg/linear_operator_diag.py +++ b/tensorflow/python/ops/linalg/linear_operator_diag.py @@ -67,7 +67,7 @@ class LinearOperatorDiag(linear_operator.LinearOperator): operator = LinearOperatorDiag(diag) # Create a shape [2, 1, 4, 2] vector. Note that this shape is compatible - # since the batch dimensions, [2, 1], are brodcast to + # since the batch dimensions, [2, 1], are broadcast to # operator.batch_shape = [2, 3]. y = tf.random_normal(shape=[2, 1, 4, 2]) x = operator.solve(y) -- GitLab From b569035378ef4a8595c64e5f398d74244cac376e Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Sun, 25 Feb 2018 21:44:12 +0900 Subject: [PATCH 0015/1931] fix typo --- tensorflow/contrib/slim/python/slim/data/parallel_reader.py | 2 +- tensorflow/python/ops/distributions/special_math.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index ad5e985487..b3343aef47 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -221,7 +221,7 @@ def parallel_read(data_sources, the data will be cycled through indefinitely. num_readers: a integer, number of Readers to create. reader_kwargs: an optional dict, of kwargs for the reader. - shuffle: boolean, wether should shuffle the files and the records by using + shuffle: boolean, whether should shuffle the files and the records by using RandomShuffleQueue as common_queue. dtypes: A list of types. The length of dtypes must equal the number of elements in each record. If it is None it will default to diff --git a/tensorflow/python/ops/distributions/special_math.py b/tensorflow/python/ops/distributions/special_math.py index bed4cbb2c1..1d605c5dfc 100644 --- a/tensorflow/python/ops/distributions/special_math.py +++ b/tensorflow/python/ops/distributions/special_math.py @@ -213,7 +213,7 @@ def _ndtri(p): # Compute x for p <= exp(-2): x = z - log(z)/z - (1/z) P(1/z) / Q(1/z), # where z = sqrt(-2. * log(p)), and P/Q are chosen between two different - # arrays based on wether p < exp(-32). + # arrays based on whether p < exp(-32). z = math_ops.sqrt(-2. * math_ops.log(sanitized_mcp)) first_term = z - math_ops.log(z) / z second_term_small_p = (_create_polynomial(1. / z, p2) -- GitLab From f1f70ef5c268d6ce41bdab4867ed0f2e19d6f924 Mon Sep 17 00:00:00 2001 From: Hovhannes Harutyunyan Date: Mon, 26 Feb 2018 10:52:11 +0400 Subject: [PATCH 0016/1931] Remove code that was written for compatibility with old checked-in code. Update code to have 80 characters per line. --- tensorflow/contrib/lite/kernels/div_test.cc | 3 +- .../internal/optimized/optimized_ops.h | 41 ------------------- .../internal/reference/reference_ops.h | 41 ------------------- tensorflow/contrib/lite/kernels/sub_test.cc | 18 +++++--- 4 files changed, 15 insertions(+), 88 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/div_test.cc b/tensorflow/contrib/lite/kernels/div_test.cc index 78918a0d79..e67e0ec034 100644 --- a/tensorflow/contrib/lite/kernels/div_test.cc +++ b/tensorflow/contrib/lite/kernels/div_test.cc @@ -154,7 +154,8 @@ TEST(QuantizedDivOpTest, WithBroadcast) { {TensorType_UINT8, {}, -3.0, 3.0}, // always a scalar {TensorType_UINT8, {}, -3.0, 3.0}, ActivationFunctionType_NONE); - m.QuantizeAndPopulate(m.input1(), {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123}); + m.QuantizeAndPopulate(m.input1(), {-0.2, 0.2, 0.07, + 0.08, 0.11, -0.123}); m.QuantizeAndPopulate(m.input2(), {0.1}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index d12a3eca1d..b19f46beaa 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1973,19 +1973,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, const uint8* input2_data, const Dims<4>& input2_dims, int32 input2_offset, @@ -2033,21 +2020,6 @@ inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, - int32 input1_offset, const uint8* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, - input2_dims, input2_offset, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_data, output_dims); -} - // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, @@ -2121,19 +2093,6 @@ void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastSub(int left_shift, const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, int32 input1_multiplier, int input1_shift, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index c7b7687622..847075e207 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1249,19 +1249,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastDiv(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, const uint8* input2_data, const Dims<4>& input2_dims, int32 input2_offset, @@ -1309,21 +1296,6 @@ inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, - int32 input1_offset, const uint8* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - BroadcastDiv(input1_data, input1_dims, input1_offset, input2_data, - input2_dims, input2_offset, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_data, output_dims); -} - inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, float output_activation_min, float output_activation_max, @@ -1392,19 +1364,6 @@ void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, } } -// legacy, for compatibility with old checked-in code -template -void BroadcastSub(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T* output_data, const Dims<4>& output_dims) { - T output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - - BroadcastSub(input1_data, input1_dims, input2_data, input2_dims, - output_activation_min, output_activation_max, output_data, - output_dims); -} - inline void BroadcastSub(int left_shift, const uint8* input1_data, const Dims<4>& input1_dims, int32 input1_offset, int32 input1_multiplier, int input1_shift, diff --git a/tensorflow/contrib/lite/kernels/sub_test.cc b/tensorflow/contrib/lite/kernels/sub_test.cc index b2c6d05f62..1fd0ee2a0e 100644 --- a/tensorflow/contrib/lite/kernels/sub_test.cc +++ b/tensorflow/contrib/lite/kernels/sub_test.cc @@ -125,11 +125,17 @@ TEST(FloatSubOpModel, WithBroadcast) { TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector> inputs1 = { - {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; + {0.1, 0.2, 0.3, 0.4}, + {-0.2, 0.2, 0.4, 0.7}, + {-0.01, 0.2, 0.7, 0.3}}; std::vector> inputs2 = { - {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.2}, {0.6, 0.4, -0.18, 0.5}}; + {0.6, 0.4, 0.3, 0.1}, + {0.6, 0.4, 0.5, -0.2}, + {0.6, 0.4, -0.18, 0.5}}; std::vector> results = { - {-0.5, -0.2, 0.0, 0.3}, {-0.8, -0.2, -0.1, 0.9}, {-0.61, -0.2, 0.88, -0.2}}; + {-0.5, -0.2, 0.0, 0.3}, + {-0.8, -0.2, -0.1, 0.9}, + {-0.61, -0.2, 0.88, -0.2}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, @@ -179,7 +185,8 @@ TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) { m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, + ElementsAreArray(ArrayFloatNear({-2.1, -0.1, 0.4, + 0.3, 0.0, 1.9}, kQuantizedTolerance))) << "With shape number " << i; } @@ -198,7 +205,8 @@ TEST(QuantizedSubOpModel, QuantizedWithBroadcast) { m.QuantizeAndPopulate(m.input2(), {0.7}); m.Invoke(); EXPECT_THAT(m.GetDequantizedOutput(), - ElementsAreArray(ArrayFloatNear({-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, + ElementsAreArray(ArrayFloatNear({-2.7, -0.5, 0.0, + 0.1, 0.4, 1.3}, kQuantizedTolerance))) << "With shape number " << i; } -- GitLab From 0489bf25930ea0dc4b7d8ffc792b0390bfbc06bc Mon Sep 17 00:00:00 2001 From: Jingwen Date: Tue, 27 Feb 2018 18:30:09 -0500 Subject: [PATCH 0017/1931] Include cstring in logging.cc for use of strrchr() --- tensorflow/core/platform/default/logging.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index 2b874da198..c6e5777c26 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -21,6 +21,7 @@ limitations under the License. #include #include #include +#include #endif #include -- GitLab From ef4e8ad826c8946f8ff3e0f7e1b3bb3bec61010c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 21 Feb 2018 15:06:04 +0800 Subject: [PATCH 0018/1931] CLN: extract ApplyAdamBaseOp --- tensorflow/core/kernels/training_ops.cc | 146 +++++++++++++++--- tensorflow/core/kernels/training_ops.h | 13 ++ .../core/kernels/training_ops_gpu.cu.cc | 30 ++++ tensorflow/core/ops/training_ops.cc | 37 +++++ 4 files changed, 202 insertions(+), 24 deletions(-) diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 233aa03c32..7d383d980a 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -328,6 +328,45 @@ struct ApplyAdamSYCL { template struct ApplyAdam : ApplyAdamNonCuda {}; +template +struct ApplyAdaMaxNonCuda { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + typename TTypes::ConstScalar beta1_power, + typename TTypes::ConstScalar beta2_power, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar beta1, + typename TTypes::ConstScalar beta2, + typename TTypes::ConstScalar epsilon, + typename TTypes::ConstFlat grad, bool use_nesterov) { + if (use_nesterov) { + LOG(WARNING) << "AdaMax doesn't support use_nesterov yet, ignore it."; + } + m.device(d) += (grad - m) * (T(1) - beta1()); + // v == u + v.device(d) = (beta2() * v).cwiseMax(grad.abs()); + // var == θ + var.device(d) -= (lr * m) / ((T(1) - beta1_power()) * v); + } +}; + +#ifdef TENSORFLOW_USE_SYCL +template +struct ApplyAdaMaxSYCL { + void operator()(const SYCLDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + T beta1_power, T beta2_power, T lr, T beta1, T beta2, + T epsilon, typename TTypes::ConstFlat grad) { + m.device(d) += (grad - m) * (T(1) - beta1); + v.device(d) = (beta2 * v).cwiseMax(grad.abs()); + var.device(d) -= (lr * m) / ((T(1) - beta1_power) * v); + } +}; +#endif // TENSORFLOW_USE_SYCL + +template +struct ApplyAdaMax : ApplyAdaMaxNonCuda {}; + template struct ApplyRMSProp { void operator()(const CPUDevice& d, typename TTypes::Flat var, @@ -2477,10 +2516,12 @@ TF_CALL_double(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #undef REGISTER_KERNELS -template -class ApplyAdamOp : public OpKernel { +template + class Functor> +class ApplyAdamBaseOp : public OpKernel { public: - explicit ApplyAdamOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + explicit ApplyAdamBaseOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("use_nesterov", &use_nesterov_)); } @@ -2553,11 +2594,11 @@ class ApplyAdamOp : public OpKernel { grad.shape().DebugString())); const Device& device = ctx->template eigen_device(); - functor::ApplyAdam()( - device, var.flat(), m.flat(), v.flat(), - beta1_power.scalar(), beta2_power.scalar(), lr.scalar(), - beta1.scalar(), beta2.scalar(), epsilon.scalar(), - grad.flat(), use_nesterov_); + auto functor = Functor(); + functor(device, var.flat(), m.flat(), v.flat(), + beta1_power.scalar(), beta2_power.scalar(), lr.scalar(), + beta1.scalar(), beta2.scalar(), epsilon.scalar(), + grad.flat(), use_nesterov_); MaybeForwardRefInputToRefOutput(ctx, 0, 0); } @@ -2568,10 +2609,11 @@ class ApplyAdamOp : public OpKernel { }; #ifdef TENSORFLOW_USE_SYCL -template -class ApplyAdamOp : public OpKernel { +template class Functor> +class ApplyAdamBaseOp : public OpKernel { public: - explicit ApplyAdamOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + explicit ApplyAdamBaseOp(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); } @@ -2672,9 +2714,10 @@ class ApplyAdamOp : public OpKernel { var.shape().DebugString(), " ", grad.shape().DebugString())); - functor::ApplyAdamSYCL()(device, var.flat(), m.flat(), v.flat(), - beta1_power, beta2_power, lr, beta1, beta2, - epsilon, grad.flat()); + auto functor = Functor(); + functor(device, var.flat(), m.flat(), v.flat(), + beta1_power, beta2_power, lr, beta1, beta2, + epsilon, grad.flat()); MaybeForwardRefInputToRefOutput(ctx, 0, 0); } @@ -2684,28 +2727,28 @@ class ApplyAdamOp : public OpKernel { }; #endif // TENSORFLOW_USE_SYCL -#define REGISTER_KERNELS(D, T) \ +#define REGISTER_KERNELS(D, T, F) \ REGISTER_KERNEL_BUILDER( \ Name("ApplyAdam").Device(DEVICE_##D).TypeConstraint("T"), \ - ApplyAdamOp); \ + ApplyAdamBaseOp); \ REGISTER_KERNEL_BUILDER(Name("ResourceApplyAdam") \ .HostMemory("var") \ .HostMemory("m") \ .HostMemory("v") \ .Device(DEVICE_##D) \ .TypeConstraint("T"), \ - ApplyAdamOp); -#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); - + ApplyAdamBaseOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T, functor::ApplyAdam); TF_CALL_half(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); +#undef REGISTER_CPU_KERNELS #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T); - +#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T, functor::ApplyAdamSYCL); TF_CALL_float(REGISTER_SYCL_KERNELS); TF_CALL_double(REGISTER_SYCL_KERNELS); +#undef REGISTER_SYCL_KERNELS #endif #if GOOGLE_CUDA @@ -2730,11 +2773,66 @@ DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // namespace functor -REGISTER_KERNELS(GPU, Eigen::half); -REGISTER_KERNELS(GPU, float); -REGISTER_KERNELS(GPU, double); +#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T, functor::ApplyAdam); +REGISTER_GPU_KERNELS(Eigen::half); +REGISTER_GPU_KERNELS(float); +REGISTER_GPU_KERNELS(double); +#undef REGISTER_GPU_KERNELS #endif +#undef REGISTER_KERNELS + +#define REGISTER_KERNELS(D, T, F) \ + REGISTER_KERNEL_BUILDER( \ + Name("ApplyAdaMax").Device(DEVICE_##D).TypeConstraint("T"), \ + ApplyAdamBaseOp); \ + REGISTER_KERNEL_BUILDER(Name("ResourceApplyAdaMax") \ + .HostMemory("var") \ + .HostMemory("m") \ + .HostMemory("v") \ + .Device(DEVICE_##D) \ + .TypeConstraint("T"), \ + ApplyAdamBaseOp); +#define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T, functor::ApplyAdaMax); +TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_float(REGISTER_CPU_KERNELS); +TF_CALL_double(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS + +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T, functor::ApplyAdaMaxSYCL); +TF_CALL_float(REGISTER_SYCL_KERNELS); +TF_CALL_double(REGISTER_SYCL_KERNELS); +#undef REGISTER_SYCL_KERNELS +#endif + +#if GOOGLE_CUDA +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ApplyAdaMax::operator()( \ + const GPUDevice& d, typename TTypes::Flat var, \ + typename TTypes::Flat m, typename TTypes::Flat v, \ + typename TTypes::ConstScalar beta1_power, \ + typename TTypes::ConstScalar beta2_power, \ + typename TTypes::ConstScalar lr, \ + typename TTypes::ConstScalar beta1, \ + typename TTypes::ConstScalar beta2, \ + typename TTypes::ConstScalar epsilon, \ + typename TTypes::ConstFlat grad, bool use_nesterov); \ + extern template struct ApplyAdaMax; +DECLARE_GPU_SPEC(Eigen::half); +DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(double); +#undef DECLARE_GPU_SPEC +} // namespace functor + +#define REGISTER_GPU_KERNELS(T) REGISTER_KERNELS(GPU, T, functor::ApplyAdaMax); +REGISTER_GPU_KERNELS(Eigen::half); +REGISTER_GPU_KERNELS(float); +REGISTER_GPU_KERNELS(double); +#undef REGISTER_GPU_KERNELS +#endif #undef REGISTER_KERNELS template diff --git a/tensorflow/core/kernels/training_ops.h b/tensorflow/core/kernels/training_ops.h index 7ee956053a..46a5290210 100644 --- a/tensorflow/core/kernels/training_ops.h +++ b/tensorflow/core/kernels/training_ops.h @@ -139,6 +139,19 @@ struct ApplyAdam { typename TTypes::ConstFlat grad, bool use_nesterov); }; +template +struct ApplyAdaMax { + void operator()(const Device& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + typename TTypes::ConstScalar beta1_power, + typename TTypes::ConstScalar beta2_power, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar beta1, + typename TTypes::ConstScalar beta2, + typename TTypes::ConstScalar epsilon, + typename TTypes::ConstFlat grad, bool use_nesterov); +}; + template struct ApplyRMSProp { void operator()(const Device& d, typename TTypes::Flat var, diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 0376a3b2c6..1776c108ab 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -142,6 +142,32 @@ struct ApplyAdam { } }; +template +struct ApplyAdaMax { + void operator()(const GPUDevice& d, typename TTypes::Flat var, + typename TTypes::Flat m, typename TTypes::Flat v, + typename TTypes::ConstScalar beta1_power, + typename TTypes::ConstScalar beta2_power, + typename TTypes::ConstScalar lr, + typename TTypes::ConstScalar beta1, + typename TTypes::ConstScalar beta2, + typename TTypes::ConstScalar epsilon, + typename TTypes::ConstFlat grad, bool use_nesterov) { + Eigen::array::Tensor::Index, 1> bcast; + bcast[0] = grad.dimension(0); + Eigen::Sizes<1> single; + const auto one = static_cast(1.0); + m.device(d) = + m + (beta1.constant(one) - beta1).reshape(single).broadcast(bcast) * + (grad - m); + v.device(d) = + (beta2.reshape(single).broadcast(bcast) * v).cwiseMax(grad.abs()); + var.device(d) -= + (lr * m) / ((beta1_power.constant(one) - + beta1_power).reshape(single).broadcast(bcast) * v); + } +}; + template struct ApplyRMSProp { void operator()(const GPUDevice& d, typename TTypes::Flat var, @@ -278,6 +304,10 @@ template struct functor::ApplyAdam; template struct functor::ApplyAdam; template struct functor::ApplyAdam; +template struct functor::ApplyAdaMax; +template struct functor::ApplyAdaMax; +template struct functor::ApplyAdaMax; + template struct functor::ApplyRMSProp; template struct functor::ApplyRMSProp; template struct functor::ApplyRMSProp; diff --git a/tensorflow/core/ops/training_ops.cc b/tensorflow/core/ops/training_ops.cc index 6ce9595fb6..6f107db3ea 100644 --- a/tensorflow/core/ops/training_ops.cc +++ b/tensorflow/core/ops/training_ops.cc @@ -737,6 +737,43 @@ REGISTER_OP("ResourceApplyAdam") return ApplyAdamShapeFn(c, false /* sparse */); }); +REGISTER_OP("ApplyAdaMax") + .Input("var: Ref(T)") + .Input("m: Ref(T)") + .Input("v: Ref(T)") + .Input("beta1_power: T") + .Input("beta2_power: T") + .Input("lr: T") + .Input("beta1: T") + .Input("beta2: T") + .Input("epsilon: T") + .Input("grad: T") + .Output("out: Ref(T)") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .Attr("use_nesterov: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAdamShapeFn(c, false /* sparse */); + }); + +REGISTER_OP("ResourceApplyAdaMax") + .Input("var: resource") + .Input("m: resource") + .Input("v: resource") + .Input("beta1_power: T") + .Input("beta2_power: T") + .Input("lr: T") + .Input("beta1: T") + .Input("beta2: T") + .Input("epsilon: T") + .Input("grad: T") + .Attr("T: numbertype") + .Attr("use_locking: bool = false") + .Attr("use_nesterov: bool = false") + .SetShapeFn([](InferenceContext* c) { + return ApplyAdamShapeFn(c, false /* sparse */); + }); + static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) { ShapeHandle unused; ShapeHandle s = ShapeOrHandleShape(c, 0); // var -- GitLab From 4d31dac8111b963ed427969c71c6957c929d3e5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 21 Feb 2018 20:29:46 +0800 Subject: [PATCH 0019/1931] ENH: add AdaMaxOptimizer in python side --- tensorflow/contrib/opt/BUILD | 20 +++ tensorflow/contrib/opt/__init__.py | 2 + .../contrib/opt/python/training/adamax.py | 72 ++++++++++ .../opt/python/training/adamax_test.py | 124 ++++++++++++++++++ tensorflow/core/kernels/training_ops.cc | 2 +- 5 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/opt/python/training/adamax.py create mode 100644 tensorflow/contrib/opt/python/training/adamax_test.py diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 86ceda71b7..a86d150f7a 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -14,6 +14,7 @@ py_library( name = "opt_py", srcs = [ "__init__.py", + "python/training/adamax.py", "python/training/addsign.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/elastic_average_optimizer.py", @@ -48,6 +49,25 @@ py_library( ], ) +py_test( + name = "adamax_test", + srcs = ["python/training/adamax_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_oss", # b/73507407 + "notsan", # b/31055119 + ], + deps = [ + ":opt_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + "//tensorflow/python:training", + "//third_party/py/numpy", + ], +) + py_test( name = "external_optimizer_test", srcs = ["python/training/external_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 6c1bb1adc0..4c13c8e247 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=wildcard-import +from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * @@ -36,6 +37,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ + 'AdaMaxOptimizer', 'PowerSignOptimizer', 'AddSignOptimizer', 'DelayCompensatedGradientDescentOptimizer', diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py new file mode 100644 index 0000000000..4e0c541d3a --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -0,0 +1,72 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""AdaMax for TensorFlow.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.training import optimizer +from tensorflow.python.training import adam +from tensorflow.python.training import training_ops +from tensorflow.python.util.tf_export import tf_export + + +@tf_export("train.AdaMaxOptimizer") +class AdaMaxOptimizer(adam.AdamOptimizer): + """Optimizer that implements the AdaMax algorithm. + + See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). + """ + + def _apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power, beta2_power = self._get_beta_accumulators() + return training_ops.apply_ada_max( + var, m, v, + math_ops.cast(beta1_power, var.dtype.base_dtype), + math_ops.cast(beta2_power, var.dtype.base_dtype), + math_ops.cast(self._lr_t, var.dtype.base_dtype), + math_ops.cast(self._beta1_t, var.dtype.base_dtype), + math_ops.cast(self._beta2_t, var.dtype.base_dtype), + math_ops.cast(self._epsilon_t, var.dtype.base_dtype), + grad, use_locking=self._use_locking).op + + def _resource_apply_dense(self, grad, var): + m = self.get_slot(var, "m") + v = self.get_slot(var, "v") + beta1_power, beta2_power = self._get_beta_accumulators() + return training_ops.resource_apply_ada_max( + var.handle, m.handle, v.handle, + math_ops.cast(beta1_power, grad.dtype.base_dtype), + math_ops.cast(beta2_power, grad.dtype.base_dtype), + math_ops.cast(self._lr_t, grad.dtype.base_dtype), + math_ops.cast(self._beta1_t, grad.dtype.base_dtype), + math_ops.cast(self._beta2_t, grad.dtype.base_dtype), + math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), + grad, use_locking=self._use_locking) + + def _apply_sparse_shared(self, grad, var, indices, scatter_add): + raise NotImplementedError() + + def _apply_sparse(self, grad, var): + raise NotImplementedError() diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py new file mode 100644 index 0000000000..a1499118dd --- /dev/null +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -0,0 +1,124 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for AdaMax.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.opt.python.training import adamax +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test + + +def adamax_update_numpy(param, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t = beta1 * m + (1 - beta1) * g_t + v_t = np.maximum(beta2 * v, np.abs(g_t)) + param_t = param - (alpha / (1 - beta1**t)) * m_t / v_t + return param_t, m_t, v_t + + +class AdaMaxOptimizerTest(test.TestCase): + + def doTestBasic(self, use_resource=False): + for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): + with self.test_session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + opt_variables = opt.variables() + beta1_power, beta2_power = opt._get_beta_accumulators() + self.assertTrue(beta1_power is not None) + self.assertTrue(beta2_power is not None) + self.assertIn(beta1_power, opt_variables) + self.assertIn(beta2_power, opt_variables) + + with ops.Graph().as_default(): + # Shouldn't return non-slot variables from other graphs. + self.assertEqual(0, len(opt.variables())) + + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of Adam + for t in range(1, 4): + if context.in_graph_mode(): + self.evaluate(update) + elif t > 1: + opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + + self.assertAllCloseAccordingToType(0.9**(t + 1), + self.evaluate(beta1_power)) + self.assertAllCloseAccordingToType(0.999**(t + 1), + self.evaluate(beta2_power)) + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) + if use_resource: + self.assertEqual("var0_%d/Adam:0" % (i,), + opt.get_slot(var=var0, name="m").name) + + def testBasic(self): + with self.test_session(): + self.doTestBasic(use_resource=False) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 7d383d980a..b3b53d9ee0 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -346,7 +346,7 @@ struct ApplyAdaMaxNonCuda { // v == u v.device(d) = (beta2() * v).cwiseMax(grad.abs()); // var == θ - var.device(d) -= (lr * m) / ((T(1) - beta1_power()) * v); + var.device(d) -= (lr() * m) / ((T(1) - beta1_power()) * v); } }; -- GitLab From ba258d530f1af5fbcc8c1b72637dc7b2177a48c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Fri, 2 Mar 2018 19:33:30 +0800 Subject: [PATCH 0020/1931] ENH: support sparse grad --- .../contrib/opt/python/training/adamax.py | 51 +++++++++++++++++-- .../opt/python/training/adamax_test.py | 2 +- tensorflow/core/kernels/training_ops.cc | 4 +- .../core/kernels/training_ops_gpu.cu.cc | 5 +- 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index 4e0c541d3a..137fce769f 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -18,12 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.training import optimizer +from tensorflow.python.ops import state_ops from tensorflow.python.training import adam from tensorflow.python.training import training_ops from tensorflow.python.util.tf_export import tf_export @@ -65,8 +65,49 @@ class AdaMaxOptimizer(adam.AdamOptimizer): math_ops.cast(self._epsilon_t, grad.dtype.base_dtype), grad, use_locking=self._use_locking) - def _apply_sparse_shared(self, grad, var, indices, scatter_add): - raise NotImplementedError() + def _apply_sparse_shared(self, grad, var, indices, + scatter_add, scatter_update): + beta1_power, beta2_power = self._get_beta_accumulators() + beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype) + beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype) + lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype) + beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype) + beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype) + epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype) + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, "m") + m_slice = array_ops.gather(m, indices) + m_t_slice = m_slice * beta1_t + grad * (1 - beta1_t) + with ops.control_dependencies([m_t_slice]): + m_t = scatter_update(m, indices, m_t_slice) + # u_t = max(beta2 * u, abs(g_t)) + v = self.get_slot(var, "v") + v_slice = array_ops.gather(v, indices) + v_t_slice = math_ops.maximum(v_slice * beta2_t, math_ops.abs(grad)) + with ops.control_dependencies([v_t_slice]): + v_t = scatter_update(v, indices, v_t_slice) + # theta_t = theta - lr / (1 - beta1^t) * m_t / u_t + var_slice = -lr_t / (1 - beta1_power) * (m_t_slice / + (v_t_slice + epsilon_t)) + with ops.control_dependencies([var_slice]): + var_update = scatter_add(var, indices, var_slice) + return control_flow_ops.group(*[var_update, m_t, v_t]) def _apply_sparse(self, grad, var): - raise NotImplementedError() + return self._apply_sparse_shared( + grad.values, var, grad.indices, + lambda x, i, v: state_ops.scatter_add( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking), + lambda x, i, v: state_ops.scatter_update( # pylint: disable=g-long-lambda + x, i, v, use_locking=self._use_locking)) + + def _resource_scatter_update(self, x, i, v): + with ops.control_dependencies( + [resource_variable_ops.resource_scatter_update( + x.handle, i, v)]): + return x.value() + + def _resource_apply_sparse(self, grad, var, indices): + return self._apply_sparse_shared( + grad, var, indices, + self._resource_scatter_add, self._resource_scatter_update) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index a1499118dd..0e2ba0987a 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -45,7 +45,7 @@ def adamax_update_numpy(param, epsilon=1e-8): m_t = beta1 * m + (1 - beta1) * g_t v_t = np.maximum(beta2 * v, np.abs(g_t)) - param_t = param - (alpha / (1 - beta1**t)) * m_t / v_t + param_t = param - (alpha / (1 - beta1**t)) * m_t / (v_t + epsilon) return param_t, m_t, v_t diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index b3b53d9ee0..0387e3011e 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -346,7 +346,7 @@ struct ApplyAdaMaxNonCuda { // v == u v.device(d) = (beta2() * v).cwiseMax(grad.abs()); // var == θ - var.device(d) -= (lr() * m) / ((T(1) - beta1_power()) * v); + var.device(d) -= lr() / (T(1) - beta1_power()) * (m / (v + epsilon())); } }; @@ -359,7 +359,7 @@ struct ApplyAdaMaxSYCL { T epsilon, typename TTypes::ConstFlat grad) { m.device(d) += (grad - m) * (T(1) - beta1); v.device(d) = (beta2 * v).cwiseMax(grad.abs()); - var.device(d) -= (lr * m) / ((T(1) - beta1_power) * v); + var.device(d) -= lr / (T(1) - beta1_power) * (m / (v + epsilon)); } }; #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/training_ops_gpu.cu.cc b/tensorflow/core/kernels/training_ops_gpu.cu.cc index 1776c108ab..54c06b130c 100644 --- a/tensorflow/core/kernels/training_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/training_ops_gpu.cu.cc @@ -163,8 +163,9 @@ struct ApplyAdaMax { v.device(d) = (beta2.reshape(single).broadcast(bcast) * v).cwiseMax(grad.abs()); var.device(d) -= - (lr * m) / ((beta1_power.constant(one) - - beta1_power).reshape(single).broadcast(bcast) * v); + lr / (beta1_power.constant(one) - + beta1_power).reshape(single).broadcast(bcast) * + (m / (v + epsilon)); } }; -- GitLab From f6f5a6019970bb8d667819da7d6316a8088a0b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 3 Mar 2018 10:02:43 +0800 Subject: [PATCH 0021/1931] DOC: add docment --- .../contrib/opt/python/training/adamax.py | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index 137fce769f..ddae06bec7 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -29,7 +29,6 @@ from tensorflow.python.training import training_ops from tensorflow.python.util.tf_export import tf_export -@tf_export("train.AdaMaxOptimizer") class AdaMaxOptimizer(adam.AdamOptimizer): """Optimizer that implements the AdaMax algorithm. @@ -37,6 +36,56 @@ class AdaMaxOptimizer(adam.AdamOptimizer): ([pdf](http://arxiv.org/pdf/1412.6980.pdf)). """ + def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, + use_locking=False, name="AdaMax"): + """Construct a new AdaMax optimizer. + + Initialization: + + ``` + m_0 <- 0 (Initialize initial 1st moment vector) + v_0 <- 0 (Initialize the exponentially weighted infinity norm) + t <- 0 (Initialize timestep) + ``` + + The update rule for `variable` with gradient `g` uses an optimization + described at the end of section7.1 of the paper: + + ``` + t <- t + 1 + lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) + + m_t <- beta1 * m_{t-1} + (1 - beta1) * g + v_t <- max(beta2 * v_{t-1}, abs(g)) + variable <- variable - lr_t / (1 - beta1^t) * m_t / (v_t + epsilon) + ``` + + Similar to AdamOptimizer, the epsilon is added for numerical stability + (especially to get rid of division by zero when v_t = 0). + + Contrast to AdamOptimizer, the sparse implementation of this algorithm + (used when the gradient is an IndexedSlices object, typically because of + `tf.gather` or an embedding lookup in the forward pass) only updates + variable slices and corresponding `m_t`, `v_t` terms when that part of + the variable was used in the forward pass. This means that the sparse + behavior is contrast to the dense behavior (similar to some momentum + implementations which ignore momentum unless a variable slice was actually + used). + + Args: + learning_rate: A Tensor or a floating point value. The learning rate. + beta1: A float value or a constant float tensor. + The exponential decay rate for the 1st moment estimates. + beta2: A float value or a constant float tensor. + The exponential decay rate for the exponentially weighted infinity norm. + epsilon: A small constant for numerical stability. + use_locking: If True use locks for update operations. + name: Optional name for the operations created when applying gradients. + Defaults to "AdaMax". + """ + super(AdaMaxOptimizer, self).__init__(learning_rate, beta1, beta2, + epsilon, use_locking, name) + def _apply_dense(self, grad, var): m = self.get_slot(var, "m") v = self.get_slot(var, "v") -- GitLab From f750e21a63c8836b9e7243ce786af2de3f65cc3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 3 Mar 2018 12:31:54 +0800 Subject: [PATCH 0022/1931] TST: add more tests --- .../contrib/opt/python/training/adamax.py | 2 +- .../opt/python/training/adamax_test.py | 243 +++++++++++++++++- 2 files changed, 233 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index ddae06bec7..36d49d4cbf 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -159,4 +159,4 @@ class AdaMaxOptimizer(adam.AdamOptimizer): def _resource_apply_sparse(self, grad, var, indices): return self._apply_sparse_shared( grad, var, indices, - self._resource_scatter_add, self._resource_scatter_update) + self._resource_scatter_add, self._resource_scatter_update) diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 0e2ba0987a..e91e5cb96a 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -35,22 +35,142 @@ from tensorflow.python.platform import test def adamax_update_numpy(param, - g_t, - t, - m, - v, - alpha=0.001, - beta1=0.9, - beta2=0.999, - epsilon=1e-8): + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): m_t = beta1 * m + (1 - beta1) * g_t v_t = np.maximum(beta2 * v, np.abs(g_t)) - param_t = param - (alpha / (1 - beta1**t)) * m_t / (v_t + epsilon) + param_t = param - (alpha / (1 - beta1**t)) * (m_t / (v_t + epsilon)) + return param_t, m_t, v_t + + +def adamax_sparse_update_numpy(param, + indices, + g_t, + t, + m, + v, + alpha=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + m_t, v_t, param_t = np.copy(m), np.copy(v), np.copy(param) + m_t_slice = beta1 * m[indices] + (1 - beta1) * g_t + v_t_slice = np.maximum(beta2 * v[indices], np.abs(g_t)) + param_t_slice = param[indices] - ((alpha / (1 - beta1**t)) * + (m_t_slice / (v_t_slice + epsilon))) + m_t[indices] = m_t_slice + v_t[indices] = v_t_slice + param_t[indices] = param_t_slice return param_t, m_t, v_t class AdaMaxOptimizerTest(test.TestCase): + def doTestSparse(self, use_resource=False): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + zero_slots = lambda: np.zeros((3), dtype=dtype.as_numpy_dtype) + m0, v0, m1, v1 = zero_slots(), zero_slots(), zero_slots(), zero_slots() + var0_np = np.array([1.0, 2.0, 3.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([4.0, 5.0, 6.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + if use_resource: + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + else: + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0_np_indices = np.array([0, 1], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np), + constant_op.constant(grads0_np_indices), constant_op.constant([2])) + grads1_np_indices = np.array([2, 1], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np), + constant_op.constant(grads1_np_indices), constant_op.constant([2])) + opt = adamax.AdaMaxOptimizer() + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0, 3.0], var0.eval()) + self.assertAllClose([4.0, 5.0, 6.0], var1.eval()) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_sparse_update_numpy( + var0_np, grads0_np_indices, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_sparse_update_numpy( + var1_np, grads1_np_indices, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSparse(self): + self.doTestSparse(use_resource=False) + + def testResourceSparse(self): + self.doTestSparse(use_resource=True) + + def testSparseDevicePlacement(self): + for index_dtype in [dtypes.int32, dtypes.int64]: + with self.test_session(force_gpu=test.is_gpu_available()): + # If a GPU is available, tests that all optimizer ops can be placed on + # it (i.e. they have GPU kernels). + var = variables.Variable([[1.0], [2.0]]) + indices = constant_op.constant([0, 1], dtype=index_dtype) + gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices)) + optimizer = adamax.AdaMaxOptimizer(3.0) + minimize_op = optimizer.minimize(gathered_sum) + variables.global_variables_initializer().run() + minimize_op.run() + + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + repeated_index_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + aggregated_update_var = variables.Variable( + [[1.0], [2.0]], dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant( + [0.1, 0.1], shape=[2, 1], dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant( + [0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), + constant_op.constant([2, 1])) + repeated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update = adamax.AdaMaxOptimizer().apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + variables.global_variables_initializer().run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + def doTestBasic(self, use_resource=False): for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]): with self.test_session(graph=ops.Graph()): @@ -93,7 +213,7 @@ class AdaMaxOptimizerTest(test.TestCase): beta1_power, beta2_power = opt._get_beta_accumulators() - # Run 3 steps of Adam + # Run 3 steps of AdaMax for t in range(1, 4): if context.in_graph_mode(): self.evaluate(update) @@ -112,13 +232,114 @@ class AdaMaxOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0)) self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1)) if use_resource: - self.assertEqual("var0_%d/Adam:0" % (i,), + self.assertEqual("var0_%d/AdaMax:0" % (i,), opt.get_slot(var=var0, name="m").name) def testBasic(self): with self.test_session(): self.doTestBasic(use_resource=False) + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic(use_resource=True) + + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer(constant_op.constant(0.001)) + update = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Run 3 steps of AdaMax + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + update.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = adamax.AdaMaxOptimizer() + update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + variables.global_variables_initializer().run() + + beta1_power, beta2_power = opt._get_beta_accumulators() + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + # Run 3 steps of intertwined AdaMax1 and AdaMax2. + for t in range(1, 4): + self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval()) + self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval()) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adamax_update_numpy(var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adamax_update_numpy(var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, var0.eval()) + self.assertAllCloseAccordingToType(var1_np, var1.eval()) + + def testTwoSessions(self): + optimizer = adamax.AdaMaxOptimizer() + g = ops.Graph() + with g.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + optimizer.apply_gradients([(grads0, var0)]) + + gg = ops.Graph() + with gg.as_default(): + with session.Session(): + var0 = variables.Variable(np.array([1.0, 2.0]), name="v0") + grads0 = constant_op.constant(np.array([0.1, 0.1])) + + # If the optimizer saves any state not keyed by graph the following line + # fails. + optimizer.apply_gradients([(grads0, var0)]) + if __name__ == "__main__": test.main() -- GitLab From 8b5e4ad404ba16919ad4f17a763ee5383d61a400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 3 Mar 2018 17:39:56 +0800 Subject: [PATCH 0023/1931] DOC: add apidef --- .../contrib/opt/python/training/adamax.py | 3 +- .../base_api/api_def_ApplyAdaMax.pbtxt | 89 +++++++++++++++++++ .../api_def_ResourceApplyAdaMax.pbtxt | 83 +++++++++++++++++ 3 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ResourceApplyAdaMax.pbtxt diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index 36d49d4cbf..fe5522a170 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -53,11 +53,10 @@ class AdaMaxOptimizer(adam.AdamOptimizer): ``` t <- t + 1 - lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) m_t <- beta1 * m_{t-1} + (1 - beta1) * g v_t <- max(beta2 * v_{t-1}, abs(g)) - variable <- variable - lr_t / (1 - beta1^t) * m_t / (v_t + epsilon) + variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) ``` Similar to AdamOptimizer, the epsilon is added for numerical stability diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt new file mode 100644 index 0000000000..106c30ca83 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdaMax.pbtxt @@ -0,0 +1,89 @@ +op { + graph_op_name: "ApplyAdaMax" + in_arg { + name: "var" + description: < Date: Mon, 5 Mar 2018 17:41:00 +0000 Subject: [PATCH 0024/1931] Update the documentation of `softmax_cross_entropy` This fix updates the documentation of `softmax_cross_entropy`, and removed the shape restrictions of `onehot_labels` and `logits`. They only needs to be of the same shape, not necessary `[batch_size, num_classes]`. This fix fixes 16263. Signed-off-by: Yong Tang --- tensorflow/python/ops/losses/losses_impl.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 7386976e93..04c13cb6c6 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -710,11 +710,16 @@ def softmax_cross_entropy( new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes + Note that `onehot_labels` and `logits` must have the same shape, + e.g. `[batch_size, num_classes]`. The shape of `weights` must be + broadcastable to loss, whose shape is decided by the shape of `logits`. + In case the shape of `logits` is `[batch_size, num_classes]`, loss is + a `Tensor` of shape `[batch_size]`. + Args: - onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels. - logits: `[batch_size, num_classes]` logits outputs of the network . - weights: Optional `Tensor` whose rank is either 0, or rank 1 and is - broadcastable to the loss which is a `Tensor` of shape `[batch_size]`. + onehot_labels: One-hot-encoded labels. + logits: Logits outputs of the network. + weights: Optional `Tensor` that is broadcastable to loss. label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. -- GitLab From 82e34cd19f554509113d438ca98ad76e42fdf4e9 Mon Sep 17 00:00:00 2001 From: Hovhannes Harutyunyan Date: Wed, 7 Mar 2018 09:14:53 +0400 Subject: [PATCH 0025/1931] Remove quantized versiaon of Div till fixing it. --- .../internal/optimized/optimized_ops.h | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index b19f46beaa..9c181fddad 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -1973,53 +1973,6 @@ void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, } } -inline void BroadcastDiv(const uint8* input1_data, const Dims<4>& input1_dims, - int32 input1_offset, const uint8* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - gemmlowp::ScopedProfilingLabel label("BroadcastDiv/8bit"); - - NdArrayDesc<4> desc1; - NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_dims, input2_dims, &desc1, &desc2); - - // In Tensorflow, the dimensions are canonically named (batch_number, row, - // col, channel), with extents (batches, height, width, depth), with the - // trailing dimension changing most rapidly (channels has the smallest stride, - // typically 1 element). - // - // In generated C code, we store arrays with the dimensions reversed. The - // first dimension has smallest stride. - // - // We name our variables by their Tensorflow convention, but generate C code - // nesting loops such that the innermost loop has the smallest stride for the - // best cache behavior. - for (int b = 0; b < ArraySize(output_dims, 3); ++b) { - for (int y = 0; y < ArraySize(output_dims, 2); ++y) { - for (int x = 0; x < ArraySize(output_dims, 1); ++x) { - for (int c = 0; c < ArraySize(output_dims, 0); ++c) { - const int32 input1_val = - input1_offset + input1_data[SubscriptToIndex(desc1, c, x, y, b)]; - const int32 input2_val = - input2_offset + input2_data[SubscriptToIndex(desc2, c, x, y, b)]; - const int32 unclamped_result = - output_offset + - MultiplyByQuantizedMultiplierSmallerThanOne( - input1_val / input2_val, output_multiplier, output_shift); - const int32 clamped_output = - std::min(output_activation_max, - std::max(output_activation_min, unclamped_result)); - output_data[Offset(output_dims, c, x, y, b)] = - static_cast(clamped_output); - } - } - } - } -} - // TODO(aselle): This is not actually optimized yet. inline void Sub(const float* input1_data, const Dims<4>& input1_dims, const float* input2_data, const Dims<4>& input2_dims, -- GitLab From f82d009d878dc675a307e69f89ba9f4dfdcd6c71 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Wed, 7 Mar 2018 21:58:39 +0800 Subject: [PATCH 0026/1931] Fix broken link of typical distributed configuration in graphs.md --- tensorflow/docs_src/programmers_guide/graphs.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index e69b717432..ca74b17542 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,9 +210,8 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` -If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed configuration}, -you might specify the job name and task ID to place variables on -a task in the parameter server job (`"/job:ps"`), and the other operations on + +If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): ```python -- GitLab From 04b6127510793b4c5aaa540b60b68ffdf3fd48ce Mon Sep 17 00:00:00 2001 From: imsheridan Date: Wed, 7 Mar 2018 22:23:50 +0800 Subject: [PATCH 0027/1931] revert the minor space nit --- tensorflow/docs_src/programmers_guide/graphs.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index ca74b17542..3b5e3e5a9a 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,8 +210,9 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` - -If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on +If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, +you might specify the job name and task ID to place variables on +a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): ```python -- GitLab From 2548a3d2cf035a229d35ab6257bee511aa3a8e23 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Thu, 8 Mar 2018 00:15:22 +0800 Subject: [PATCH 0028/1931] fix some typo --- tensorflow/docs_src/programmers_guide/graphs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 3b5e3e5a9a..f28660d44a 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -505,10 +505,10 @@ multiple graphs in the same process. As noted above, TensorFlow provides a "default graph" that is implicitly passed to all API functions in the same context. For many applications, a single graph is sufficient. However, TensorFlow also provides methods for manipulating -the default graph, which can be useful in more advanced used cases. For example: +the default graph, which can be useful in more advanced use cases. For example: * A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each - operation in a single graph must have a unique name. TensorFlow will + operation in a single graph must have an unique name. TensorFlow will "uniquify" the names of operations by appending `"_1"`, `"_2"`, and so on to their names if the requested name is already taken. Using multiple explicitly created graphs gives you more control over what name is given to each -- GitLab From 955f41c5f2240495a086b503e54eac6928876aca Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 7 Mar 2018 14:04:26 -0800 Subject: [PATCH 0029/1931] Cleanup `astor` output to match `codegen` output. The default `astor` output messes up the function signature docs for many docs without a bit of cleanup. With this change the only differences I see are parens around lambdas and math expressions in default arguments. --- tensorflow/tools/docs/parser.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 1798378d55..0fcd0abc4a 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -650,6 +650,9 @@ def _remove_first_line_indent(string): return '\n'.join([line[indent:] for line in string.split('\n')]) +PAREN_NUMBER_RE = re.compile("^\(([0-9.e-]+)\)") + + def _generate_signature(func, reverse_index): """Given a function, returns a list of strings representing its args. @@ -705,7 +708,11 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = astor.to_source(ast_default) + default_text = ( + astor.to_source(ast_default).rstrip('\n').replace('\t','\\t') + .replace('\n','\\n').replace('"""',"'")) + default_text = PAREN_NUMBER_RE.sub('\\1',default_text) + if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. -- GitLab From c22d11f4fcc2801d0a5de98a84461e03e1bcb674 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 7 Mar 2018 14:14:08 -0800 Subject: [PATCH 0030/1931] add back docs --- tensorflow/docs_src/community/documentation.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 8d55148e48..f7b7ba14e5 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,7 +148,19 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If bazel is not installed on your machine, install it now. If you are on +1. If pip isn't installed on your machine, install it now by issuing the +following command: + + $ sudo easy_install pip + +2. Use pip to install mock and pandas by issuing the following + command (Note: If you are using + a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your + dependencies, you may not want to use sudo for these installations): + + $ sudo pip install mock pandas + +3. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -156,10 +168,10 @@ doing the following: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -2. Change directory to the top-level `tensorflow` directory of the TensorFlow +4. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -3. Run the `configure` script and answer its prompts appropriately for your +5. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure -- GitLab From cbb517551964879dcb6eac2b00bf74db6c827975 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 7 Mar 2018 14:54:24 -0800 Subject: [PATCH 0031/1931] Revert "add back docs" This reverts commit c22d11f4fcc2801d0a5de98a84461e03e1bcb674. --- tensorflow/docs_src/community/documentation.md | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index f7b7ba14e5..8d55148e48 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,19 +148,7 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If pip isn't installed on your machine, install it now by issuing the -following command: - - $ sudo easy_install pip - -2. Use pip to install mock and pandas by issuing the following - command (Note: If you are using - a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your - dependencies, you may not want to use sudo for these installations): - - $ sudo pip install mock pandas - -3. If bazel is not installed on your machine, install it now. If you are on +1. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -168,10 +156,10 @@ following command: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -4. Change directory to the top-level `tensorflow` directory of the TensorFlow +2. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -5. Run the `configure` script and answer its prompts appropriately for your +3. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure -- GitLab From d34eaf348848fe153a5fd245aa75c2ca32973b36 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 7 Mar 2018 21:53:25 -0800 Subject: [PATCH 0032/1931] fix encoding and lint --- tensorflow/tools/docs/build_docs_test.py | 1 - tensorflow/tools/docs/generate_lib.py | 13 ++++++------- tensorflow/tools/docs/parser.py | 6 +++--- tensorflow/tools/docs/py_guide_parser.py | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py index 2e8f634e7c..0cbf8b478f 100644 --- a/tensorflow/tools/docs/build_docs_test.py +++ b/tensorflow/tools/docs/build_docs_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import os -import sys import textwrap import tensorflow as tf diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 635408d87f..a7ab0fa538 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -21,7 +21,6 @@ from __future__ import print_function import argparse import fnmatch import os -import sys import six @@ -134,8 +133,8 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): try: if not os.path.exists(directory): os.makedirs(directory) - with open(path, 'w') as f: - f.write(pretty_docs.build_md_page(page_info)) + with open(path, 'wb') as f: + f.write(pretty_docs.build_md_page(page_info).encode('utf-8')) except OSError as e: print('Cannot write documentation for %s to %s: %s' % (full_name, directory, e)) @@ -434,19 +433,19 @@ def _other_docs(src_dir, output_dir, reference_resolver, file_pattern='*.md'): full_out_path = os.path.join(output_dir, suffix) if not fnmatch.fnmatch(base_name, file_pattern): print('Copying un-matched file %s...' % suffix) - open(full_out_path, 'w').write(open(full_in_path).read()) + open(full_out_path, 'wb').write(open(full_in_path, 'rb').read()) continue if dirpath.endswith('/api_guides/python'): print('Processing Python guide %s...' % base_name) content = tag_updater.process(full_in_path) else: print('Processing doc %s...' % suffix) - content = open(full_in_path).read() + content = open(full_in_path, 'rb').read().decode('utf-8') content = reference_resolver.replace_references(content, relative_path_to_root) - with open(full_out_path, 'w') as f: - f.write(content) + with open(full_out_path, 'wb') as f: + f.write(content.encode('utf-8')) print('Done.') diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 0fcd0abc4a..dd0351b4c6 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -709,9 +709,9 @@ def _generate_signature(func, reverse_index): default_text = reverse_index[id(default)] elif ast_default is not None: default_text = ( - astor.to_source(ast_default).rstrip('\n').replace('\t','\\t') - .replace('\n','\\n').replace('"""',"'")) - default_text = PAREN_NUMBER_RE.sub('\\1',default_text) + astor.to_source(ast_default).rstrip('\n').replace('\t', '\\t') + .replace('\n', '\\n').replace('"""', "'")) + default_text = PAREN_NUMBER_RE.sub('\\1', default_text) if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. diff --git a/tensorflow/tools/docs/py_guide_parser.py b/tensorflow/tools/docs/py_guide_parser.py index 216353ecee..328f42d18f 100644 --- a/tensorflow/tools/docs/py_guide_parser.py +++ b/tensorflow/tools/docs/py_guide_parser.py @@ -44,7 +44,7 @@ class PyGuideParser(object): def process(self, full_path): """Read and process the file at `full_path`.""" - md_string = open(full_path).read() + md_string = open(full_path, 'rb').read().decode('utf-8') self._lines = md_string.split('\n') seen = set() -- GitLab From cee41f9d10b81ce3b49f566ddd448a7f3f2872c3 Mon Sep 17 00:00:00 2001 From: KB Sriram Date: Wed, 7 Mar 2018 08:11:03 -0800 Subject: [PATCH 0033/1931] C++ gradient for StridedSlice See https://github.com/tensorflow/tensorflow/issues/9645 --- tensorflow/cc/gradients/array_grad.cc | 36 ++++++++++++++++++++++ tensorflow/cc/gradients/array_grad_test.cc | 24 +++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index 6545e4ee3e..ff348fadb2 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad); +Status StridedSliceGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + Input x = Shape(scope, op.input(0)); + Input begin = op.input(1); + Input end = op.input(2); + Input strides = op.input(3); + int64 begin_mask; + int64 end_mask; + int64 ellipsis_mask; + int64 new_axis_mask; + int64 shrink_axis_mask; + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "begin_mask", &begin_mask)); + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "end_mask", &end_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "ellipsis_mask", &ellipsis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "new_axis_mask", &new_axis_mask)); + TF_RETURN_IF_ERROR( + GetNodeAttr(op.node()->attrs(), "shrink_axis_mask", &shrink_axis_mask)); + grad_outputs->push_back( + StridedSliceGrad(scope, x, begin, end, strides, grad_inputs[0], + StridedSliceGrad::BeginMask(begin_mask) + .EndMask(end_mask) + .EllipsisMask(ellipsis_mask) + .NewAxisMask(new_axis_mask) + .ShrinkAxisMask(shrink_axis_mask))); + // No gradients returned for begin, end and strides + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 4a215fcc92..2a2180297c 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -354,5 +354,29 @@ TEST_F(ArrayGradTest, MirrorPadGradGrad_Symmetric) { RunTest(x, x_shape, y, y_shape); } +TEST_F(ArrayGradTest, StridedSliceGrad) { + TensorShape x_shape({6, 4, 4}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + + // y = x[2:6:2, 1:3, 1:3] + auto y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}); + // y.shape = [2, 2, 2]; + RunTest(x, x_shape, y, {2, 2, 2}); + + // y = x[2:6:2, 1:3, 1:3] + // begin_mask = 1<<1 (ignore begin_index = 1) + // end_mask = 1<<2 (ignore end_index = 2) + y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, + StridedSlice::BeginMask(1<<1).EndMask(1<<2)); + // y.shape = [2, 3, 3]; + RunTest(x, x_shape, y, {2, 3, 3}); + + // y = [tf.newaxis, 2:6:2, 1:3, 1:3] + y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, + StridedSlice::NewAxisMask(1<<0)); + // y.shape = [1, 2, 2, 2]; + RunTest(x, x_shape, y, {1, 2, 2, 2}); +} + } // namespace } // namespace tensorflow -- GitLab From e31fb25f4e3989a846a8e54d789a3bf5efff0cea Mon Sep 17 00:00:00 2001 From: KB Sriram Date: Thu, 8 Mar 2018 07:40:24 -0800 Subject: [PATCH 0034/1931] Clang-format fixes. --- tensorflow/cc/gradients/array_grad_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index 2a2180297c..de3bd0fc9e 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -367,13 +367,13 @@ TEST_F(ArrayGradTest, StridedSliceGrad) { // begin_mask = 1<<1 (ignore begin_index = 1) // end_mask = 1<<2 (ignore end_index = 2) y = StridedSlice(scope_, x, {2, 1, 1}, {6, 3, 3}, {2, 1, 1}, - StridedSlice::BeginMask(1<<1).EndMask(1<<2)); + StridedSlice::BeginMask(1 << 1).EndMask(1 << 2)); // y.shape = [2, 3, 3]; RunTest(x, x_shape, y, {2, 3, 3}); // y = [tf.newaxis, 2:6:2, 1:3, 1:3] y = StridedSlice(scope_, x, {0, 2, 1, 1}, {0, 6, 3, 3}, {1, 2, 1, 1}, - StridedSlice::NewAxisMask(1<<0)); + StridedSlice::NewAxisMask(1 << 0)); // y.shape = [1, 2, 2, 2]; RunTest(x, x_shape, y, {1, 2, 2, 2}); } -- GitLab From d6533df7cd3ef19b39081a64fcb0bed5f83c7ee0 Mon Sep 17 00:00:00 2001 From: Giuseppe Date: Thu, 8 Mar 2018 17:49:29 +0100 Subject: [PATCH 0035/1931] Fix markdown error in layers tutorial. --- tensorflow/docs_src/tutorials/layers.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index ee03f440c9..b24d3f4cad 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -192,8 +192,7 @@ dive deeper into the `tf.layers` code used to create each layer, as well as how to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just -skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training-and-evaluating-the-cnn-mnist-classifier). +skip ahead to ["Training and Evaluating the CNN MNIST Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). ### Input Layer @@ -534,9 +533,8 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining -> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in -> tf.estimator"} tutorial. +> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining the training op for the model"} +> in the @{$get_started/custom_estimators$"Creating Estimations in tf.estimator"} tutorial. ### Add evaluation metrics -- GitLab From e8cf1fb7dc9dabe1a2a0b181a7b587c1300888a3 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Thu, 8 Mar 2018 14:07:30 -0800 Subject: [PATCH 0036/1931] Use getfullargspec in signature parsing. --- tensorflow/python/util/tf_inspect.py | 36 ++++++++++++++++++++++------ tensorflow/tools/docs/parser.py | 34 +++++++++++++------------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index c4168f7b1a..1fbc33ba0b 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -18,12 +18,22 @@ from __future__ import division from __future__ import print_function import inspect as _inspect +import six +from collections import namedtuple from tensorflow.python.util import tf_decorator ArgSpec = _inspect.ArgSpec +if six.PY3: + FullArgSpec = _inspect.FullArgSpec +else: + FullArgSpec = namedtuple( + 'FullArgSpec', ['args', 'varargs', 'varkw', 'defaults', + 'kwonlyargs', 'kwonlydefaults', 'annotations']) + + def currentframe(): """TFDecorator-aware replacement for inspect.currentframe.""" return _inspect.stack()[1][0] @@ -46,20 +56,32 @@ def getargspec(object): # pylint: disable=redefined-builtin def getfullargspec(obj): # pylint: disable=redefined-builtin - """TFDecorator-aware replacement for inspect.getfullargspec and fallback to - inspect.getargspec in Python 2. + """TFDecorator-aware replacement for inspect.getfullargspec. Args: obj: A callable, possibly decorated. Returns: - The `FullArgSpec` (`ArgSpec` in Python 2) that describes the signature of + The `FullArgSpec` that describes the signature of the outermost decorator that changes the callable's signature. If the - callable is not decorated, `inspect.getfullargspec()` - (`inspect.getargspec()` in Python 2) will be called directly on the - callable. + callable is not decorated, `inspect.getfullargspec()` will be called + directly on the callable. """ - spec_fn = getattr(_inspect, 'getfullargspec', getattr(_inspect, 'getargspec')) + if six.PY2: + def spec_fn(target): + argspecs = _inspect.getargspec(target) + fullargspecs = FullArgSpec( + args=argspecs.args, + varargs=argspecs.varargs, + varkw=argspecs.keywords, + defaults=argspecs.defaults, + kwonlyargs=[], + kwonlydefaults={}, + annotations={}) + return fullargspecs + else: + spec_fn = _inspect.getfullargspec + decorators, target = tf_decorator.unwrap(obj) return next((d.decorator_argspec for d in decorators if d.decorator_argspec is not None), spec_fn(target)) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index dd0351b4c6..16513d0ee1 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -601,20 +601,20 @@ def _parse_md_docstring(py_object, relative_path_to_root, reference_resolver): def _get_arg_spec(func): """Extracts signature information from a function or functools.partial object. - For functions, uses `tf_inspect.getargspec`. For `functools.partial` objects, - corrects the signature of the underlying function to take into account the - removed arguments. + For functions, uses `tf_inspect.getfullargspec`. For `functools.partial` + objects, corrects the signature of the underlying function to take into + account the removed arguments. Args: func: A function whose signature to extract. Returns: - An `ArgSpec` namedtuple `(args, varargs, keywords, defaults)`, as returned - by `tf_inspect.getargspec`. + An `FullArgSpec` namedtuple `(args, varargs, varkw, defaults, etc.)`, + as returned by `tf_inspect.getfullargspec`. """ - # getargspec does not work for functools.partial objects directly. + # getfullargspec does not work for functools.partial objects directly. if isinstance(func, functools.partial): - argspec = tf_inspect.getargspec(func.func) + argspec = tf_inspect.getfullargspec(func.func) # Remove the args from the original function that have been used up. first_default_arg = ( len(argspec.args or []) - len(argspec.defaults or [])) @@ -637,12 +637,14 @@ def _get_arg_spec(func): argspec_defaults.pop(i-first_default_arg) else: first_default_arg -= 1 - return tf_inspect.ArgSpec(args=argspec_args, - varargs=argspec.varargs, - keywords=argspec.keywords, - defaults=tuple(argspec_defaults)) + # NOTE Some fields from FullArgSpec were removed here. + # Add them back if needed in the future. + return tf_inspect.FullArgSpec(args=argspec_args, + varargs=argspec.varargs, + varkw=argspec.varkw, + defaults=tuple(argspec_defaults)) else: # Regular function or method, getargspec will work fine. - return tf_inspect.getargspec(func) + return tf_inspect.getfullargspec(func) def _remove_first_line_indent(string): @@ -657,7 +659,7 @@ def _generate_signature(func, reverse_index): """Given a function, returns a list of strings representing its args. This function produces a list of strings representing the arguments to a - python function. It uses tf_inspect.getargspec, which + python function. It uses tf_inspect.getfullargspec, which does not generalize well to Python 3.x, which is more flexible in how *args and **kwargs are handled. This is not a problem in TF, since we have to remain compatible to Python 2.7 anyway. @@ -710,7 +712,7 @@ def _generate_signature(func, reverse_index): elif ast_default is not None: default_text = ( astor.to_source(ast_default).rstrip('\n').replace('\t', '\\t') - .replace('\n', '\\n').replace('"""', "'")) + .replace('\n', '\\n').replace('"""', "'")) default_text = PAREN_NUMBER_RE.sub('\\1', default_text) if default_text != repr(default): @@ -745,8 +747,8 @@ def _generate_signature(func, reverse_index): # Add *args and *kwargs. if argspec.varargs: args_list.append('*' + argspec.varargs) - if argspec.keywords: - args_list.append('**' + argspec.keywords) + if argspec.varkw: + args_list.append('**' + argspec.varkw) return args_list -- GitLab From 8cf2a1f0db40174cd6feab96c07e47ba8349d11c Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Thu, 8 Mar 2018 14:18:54 -0800 Subject: [PATCH 0037/1931] fix encoding again --- tensorflow/tools/docs/generate_lib.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index a7ab0fa538..d9e8069a61 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -133,8 +133,12 @@ def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'): try: if not os.path.exists(directory): os.makedirs(directory) + # This function returns raw bytes in PY2 or unicode in PY3. + text = pretty_docs.build_md_page(page_info) + if six.PY3: + text = text.encode('utf-8') with open(path, 'wb') as f: - f.write(pretty_docs.build_md_page(page_info).encode('utf-8')) + f.write(text) except OSError as e: print('Cannot write documentation for %s to %s: %s' % (full_name, directory, e)) -- GitLab From fe46c22a80b068b2b30f1e44f2f950ba6b6e907b Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Fri, 9 Mar 2018 22:41:37 +0000 Subject: [PATCH 0038/1931] Update fold_old_batch_norms.cc Fixes the problem of using fused batch normalization and this transform, only shows up when using 'NCHW' as the default is 'NHWC'. --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d86f65325b..a5acd53ad6 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,6 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); + bias_add_op.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From 1ad788b136d509888cf7d484f762e31b2ee37a50 Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Fri, 9 Mar 2018 22:46:30 +0000 Subject: [PATCH 0039/1931] Update fold_old_batch_norms.cc --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index a5acd53ad6..3376a81312 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,7 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); - bias_add_op.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) + bias_add_node.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From d0680917907671f5870818d21ee0ff77bf7c3ff6 Mon Sep 17 00:00:00 2001 From: Joe Yearsley Date: Fri, 9 Mar 2018 23:56:52 +0000 Subject: [PATCH 0040/1931] Update fold_old_batch_norms.cc --- tensorflow/tools/graph_transforms/fold_old_batch_norms.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index 3376a81312..59f3ffdcda 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -159,7 +159,7 @@ Status FuseScaleOffsetToConvWeights(const std::vector& scale_values, NodeDef bias_add_node; bias_add_node.set_op("BiasAdd"); bias_add_node.set_name(conv_output_name); - bias_add_node.attr["data_format"].CopyFrom(conv_node.attr["data_format"]) + CopyNodeAttr(conv_node, "data_format", "data_format", &bias_add_node); CopyNodeAttr(conv_node, "T", "T", &bias_add_node); AddNodeInput(conv_node.name(), &bias_add_node); AddNodeInput(bias_offset_node.name(), &bias_add_node); -- GitLab From b4db970c338123ee3156bb0e216193bde35d4b17 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 13 Mar 2018 00:04:33 +0800 Subject: [PATCH 0041/1931] fix broken link of tensor-like type --- tensorflow/docs_src/programmers_guide/graphs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index f28660d44a..81fd99cb4a 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -362,7 +362,7 @@ operations that are needed to compute the result. @{tf.Session.run} requires you to specify a list of **fetches**, which determine the return values, and may be a @{tf.Operation}, a @{tf.Tensor}, or -a [tensor-like type](#tensor-like-objects) such as @{tf.Variable}. These fetches +a [tensor-like type](#tensor-like_objects) such as @{tf.Variable}. These fetches determine what **subgraph** of the overall @{tf.Graph} must be executed to produce the result: this is the subgraph that contains all operations named in the fetch list, plus all operations whose outputs are used to compute the value -- GitLab From 66b38c5e7af4b607f393973d18aaabb6e00f9723 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 12 Mar 2018 12:56:59 -0700 Subject: [PATCH 0042/1931] Block docs for str, repr, hash. No python2 code is generating useful docs for these, and in python3 many useless docs are generated, so I've blocked them. --- tensorflow/tools/docs/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 5f2a411bae..95155b1149 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -1127,7 +1127,8 @@ class _ClassPageInfo(object): # Remove builtin members that we never want to document. if short_name in ['__class__', '__base__', '__weakref__', '__doc__', '__module__', '__dict__', '__abstractmethods__', - '__slots__', '__getnewargs__']: + '__slots__', '__getnewargs__', '__str__', + '__repr__', '__hash__']: continue child_name = '.'.join([self.full_name, short_name]) @@ -1172,7 +1173,7 @@ class _ClassPageInfo(object): # obvious what they do, don't include them in the docs if there's no # docstring. if not child_doc.brief.strip() and short_name in [ - '__str__', '__repr__', '__hash__', '__del__', '__copy__']: + '__del__', '__copy__']: print('Skipping %s, defined in %s, no docstring.' % (child_name, defining_class)) continue -- GitLab From 1f03b013ef00c128cf8331f274524a23d86ac458 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 13 Mar 2018 16:44:57 +0800 Subject: [PATCH 0043/1931] revert wrong typo fix --- tensorflow/docs_src/programmers_guide/graphs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 81fd99cb4a..69eb6df5f6 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -508,7 +508,7 @@ is sufficient. However, TensorFlow also provides methods for manipulating the default graph, which can be useful in more advanced use cases. For example: * A @{tf.Graph} defines the namespace for @{tf.Operation} objects: each - operation in a single graph must have an unique name. TensorFlow will + operation in a single graph must have a unique name. TensorFlow will "uniquify" the names of operations by appending `"_1"`, `"_2"`, and so on to their names if the requested name is already taken. Using multiple explicitly created graphs gives you more control over what name is given to each -- GitLab From d751b6bfa84dae1be9835fc40cc3094a8205a74e Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 13 Mar 2018 23:11:47 +0800 Subject: [PATCH 0044/1931] Fix link of typical distributed configuration --- tensorflow/docs_src/programmers_guide/graphs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 69eb6df5f6..e4095cf7dd 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,7 +210,7 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` -If you are deploying TensorFlow in a typical @{$deploy/distributed} configuration, +If you are deploying TensorFlow in a @{$distributed$typical distributed configuration}, you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): -- GitLab From b618740a8754e85a2a6ee142028105f76a4d5d58 Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Fri, 16 Mar 2018 00:11:38 +0900 Subject: [PATCH 0045/1931] implement matrix 2-norm --- tensorflow/python/ops/linalg_ops.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 37470e00d7..110b766a6e 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -454,7 +454,7 @@ def norm(tensor, This function can compute several different vector norms (the 1-norm, the Euclidean or 2-norm, the inf-norm, and in general the p-norm for p > 0) and - matrix norms (Frobenius, 1-norm, and inf-norm). + matrix norms (Frobenius, 1-norm, 2-norm and inf-norm). Args: tensor: `Tensor` of types `float32`, `float64`, `complex64`, `complex128` @@ -465,7 +465,7 @@ def norm(tensor, Some restrictions apply: a) The Frobenius norm `fro` is not defined for vectors, b) If axis is a 2-tuple (matrix norm), only 'euclidean', 'fro', `1`, - `np.inf` are supported. + `2`, `np.inf` are supported. See the description of `axis` on how to compute norms for a batch of vectors or matrices stored in a tensor. axis: If `axis` is `None` (the default), the input is considered a vector @@ -521,8 +521,7 @@ def norm(tensor, axis[0] == axis[1]): raise ValueError( "'axis' must be None, an integer, or a tuple of 2 unique integers") - # TODO(rmlarsen): Implement matrix 2-norm using tf.svd(). - supported_matrix_norms = ['euclidean', 'fro', 1, np.inf] + supported_matrix_norms = ['euclidean', 'fro', 1, 2, np.inf] if ord not in supported_matrix_norms: raise ValueError("'ord' must be a supported matrix norm in %s, got %s" % (supported_matrix_norms, ord)) @@ -539,10 +538,20 @@ def norm(tensor, with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) + rank = len(tensor.get_shape().as_list()) + axis = tuple(map(lambda i: i if i >= 0 else i + rank, axis)) + if ord in ['fro', 'euclidean', 2, 2.0]: - # TODO(rmlarsen): Move 2-norm to a separate clause once we support it for - # matrices. - result = math_ops.sqrt( + if is_matrix_norm and ord in [2, 2.0]: + axes = list(range(rank)) + perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) + perm_after = list(map(lambda i: perm_before.index(i), axes)) + result = array_ops.transpose(array_ops.expand_dims(math_ops.reduce_max( + gen_linalg_ops.svd(array_ops.transpose(tensor, perm=perm_before), + compute_uv=False)[0], axis=-1, keepdims=True), + axis=-1), perm=perm_after) + else: + result = math_ops.sqrt( math_ops.reduce_sum( tensor * math_ops.conj(tensor), axis, keepdims=True)) else: -- GitLab From a280a1d0cfd64831857826db639a3ee0180094de Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Fri, 16 Mar 2018 00:32:34 +0900 Subject: [PATCH 0046/1931] follow python coding style --- tensorflow/python/ops/linalg_ops.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 110b766a6e..b467711e3b 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -546,14 +546,15 @@ def norm(tensor, axes = list(range(rank)) perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) perm_after = list(map(lambda i: perm_before.index(i), axes)) - result = array_ops.transpose(array_ops.expand_dims(math_ops.reduce_max( - gen_linalg_ops.svd(array_ops.transpose(tensor, perm=perm_before), - compute_uv=False)[0], axis=-1, keepdims=True), - axis=-1), perm=perm_after) + result = array_ops.transpose(array_ops.expand_dims( + math_ops.reduce_max(gen_linalg_ops.svd( + array_ops.transpose(tensor, perm=perm_before), + compute_uv=False)[0], axis=-1, keepdims=True), axis=-1), + perm=perm_after) else: result = math_ops.sqrt( - math_ops.reduce_sum( - tensor * math_ops.conj(tensor), axis, keepdims=True)) + math_ops.reduce_sum( + tensor * math_ops.conj(tensor), axis, keepdims=True)) else: result = math_ops.abs(tensor) if ord == 1: -- GitLab From cc10ac9b7d593375a7cee0c167c20989dc29e8cf Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Fri, 16 Mar 2018 00:40:05 +0900 Subject: [PATCH 0047/1931] remove unnecessary lambda --- tensorflow/python/ops/linalg_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index b467711e3b..db6ce71125 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -545,7 +545,7 @@ def norm(tensor, if is_matrix_norm and ord in [2, 2.0]: axes = list(range(rank)) perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) - perm_after = list(map(lambda i: perm_before.index(i), axes)) + perm_after = list(map(perm_before.index, axes)) result = array_ops.transpose(array_ops.expand_dims( math_ops.reduce_max(gen_linalg_ops.svd( array_ops.transpose(tensor, perm=perm_before), -- GitLab From b21ceeb518ca9462a247d8be05870f12bebad201 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Mar 2018 23:13:25 -0700 Subject: [PATCH 0048/1931] Enhancement with deprecated_argument_lookup for argmax This fix makes some enhancement for argmax, using deprecated_argument_lookup instread of customerized logic. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e18d0e9501..9a88b71398 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -208,11 +208,9 @@ def argmax(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_max(input, axis, name=name, output_type=output_type) -- GitLab From 82571ca199869f60fe2036d15d0071031d997b47 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Mar 2018 23:15:37 -0700 Subject: [PATCH 0049/1931] Enhancement with deprecated_argument_lookup for argmin This fix makes some enhancement for argmin, using deprecated_argument_lookup instread of customerized logic. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 9a88b71398..a2892d206d 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -226,11 +226,9 @@ def argmin(input, name=None, dimension=None, output_type=dtypes.int64): - if dimension is not None: - if axis is not None: - raise ValueError("Cannot specify both 'axis' and 'dimension'") - axis = dimension - elif axis is None: + axis = deprecation.deprecated_argument_lookup( + "axis", axis, "dimension", dimension) + if axis is None: axis = 0 return gen_math_ops.arg_min(input, axis, name=name, output_type=output_type) -- GitLab From 52fef7f6b8b41d4fffa92bddcb78d96eb6333051 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Fri, 16 Mar 2018 16:03:26 +0900 Subject: [PATCH 0050/1931] fix typo --- tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 272410c693..7651a03fe5 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -398,7 +398,7 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNorms) { } TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { - // Test axis is not 3, so all weigths and offsets are fused to each of inputs + // Test axis is not 3, so all weights and offsets are fused to each of inputs // of conv2d. TestFoldFusedBatchNormsWithConcat(/*split=*/true); // Test axis = 3, BatchNorm weights and offsets will be split before fused -- GitLab From 20424e92417b520d7ea8c7323eee46538d2b909f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 17 Mar 2018 09:30:24 +0800 Subject: [PATCH 0051/1931] CLN: remove the unused import: tf_export --- tensorflow/contrib/opt/python/training/adamax.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/opt/python/training/adamax.py b/tensorflow/contrib/opt/python/training/adamax.py index fe5522a170..65918831e9 100644 --- a/tensorflow/contrib/opt/python/training/adamax.py +++ b/tensorflow/contrib/opt/python/training/adamax.py @@ -26,7 +26,6 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.training import adam from tensorflow.python.training import training_ops -from tensorflow.python.util.tf_export import tf_export class AdaMaxOptimizer(adam.AdamOptimizer): -- GitLab From b5ebb7e9e5f5ae59e6db93bb5950f4bb68bf9e18 Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Sun, 18 Mar 2018 00:48:46 +0900 Subject: [PATCH 0052/1931] update norm_op_test --- tensorflow/python/kernel_tests/norm_op_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py index d85512fae6..d6625b69ef 100644 --- a/tensorflow/python/kernel_tests/norm_op_test.py +++ b/tensorflow/python/kernel_tests/norm_op_test.py @@ -85,8 +85,6 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_): if ((not is_matrix_norm and ord_ == "fro") or (is_matrix_norm and is_fancy_p_norm)): self.skipTest("Not supported by neither numpy.linalg.norm nor tf.norm") - if is_matrix_norm and ord_ == 2: - self.skipTest("Not supported by tf.norm") if ord_ == 'euclidean' or (axis_ is None and len(shape) > 2): self.skipTest("Not supported by numpy.linalg.norm") matrix = np.random.randn(*shape_).astype(dtype_) -- GitLab From c53160a2a5decdae30bda6e8f40b45f3b4dd9f8e Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Sun, 18 Mar 2018 00:49:13 +0900 Subject: [PATCH 0053/1931] use tf function instead of np --- tensorflow/python/ops/linalg_ops.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index db6ce71125..d8150d85b9 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import math_ops # pylint: disable=wildcard-import @@ -538,19 +539,27 @@ def norm(tensor, with ops.name_scope(name, 'norm', [tensor]): tensor = ops.convert_to_tensor(tensor) - rank = len(tensor.get_shape().as_list()) - axis = tuple(map(lambda i: i if i >= 0 else i + rank, axis)) if ord in ['fro', 'euclidean', 2, 2.0]: if is_matrix_norm and ord in [2, 2.0]: - axes = list(range(rank)) - perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis) - perm_after = list(map(perm_before.index, axes)) - result = array_ops.transpose(array_ops.expand_dims( - math_ops.reduce_max(gen_linalg_ops.svd( - array_ops.transpose(tensor, perm=perm_before), - compute_uv=False)[0], axis=-1, keepdims=True), axis=-1), - perm=perm_after) + rank = array_ops.rank(tensor) + axis = functional_ops.map_fn( + lambda i: control_flow_ops.cond(i >= 0, lambda: i, + lambda: i + rank), + ops.convert_to_tensor(axis)).eval() + axes = math_ops.range(rank) + perm_before = array_ops.concat( + [array_ops.setdiff1d(axes, axis)[0], axis], axis=0) + perm_after = functional_ops.map_fn( + lambda i: math_ops.cast( + array_ops.squeeze( + array_ops.where(math_ops.equal(perm_before, i))), + dtype=dtypes.int32), axes) + permed = array_ops.transpose(tensor, perm=perm_before) + matrix_2_norm = array_ops.expand_dims( + math_ops.reduce_max(gen_linalg_ops.svd(permed, compute_uv=False)[0], + axis=-1, keepdims=True), axis=-1) + result = array_ops.transpose(matrix_2_norm, perm=perm_after) else: result = math_ops.sqrt( math_ops.reduce_sum( -- GitLab From fda633fb7187da8522ef79555d1267996fa983bc Mon Sep 17 00:00:00 2001 From: Wenhao Hu Date: Sun, 18 Mar 2018 21:29:16 +0900 Subject: [PATCH 0054/1931] remove test code --- tensorflow/python/ops/linalg_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index d8150d85b9..608b72c574 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -546,7 +546,7 @@ def norm(tensor, axis = functional_ops.map_fn( lambda i: control_flow_ops.cond(i >= 0, lambda: i, lambda: i + rank), - ops.convert_to_tensor(axis)).eval() + ops.convert_to_tensor(axis)) axes = math_ops.range(rank) perm_before = array_ops.concat( [array_ops.setdiff1d(axes, axis)[0], axis], axis=0) -- GitLab From a34a3b2035ca0cfd48488c03bd4b088070bf9a25 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Thu, 22 Mar 2018 14:32:12 -0700 Subject: [PATCH 0055/1931] Fixing the issue where MKL-DNN is getting built when not using --config=mkl --- tensorflow/tensorflow.bzl | 53 +++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 9b0db8a112..8549c34691 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -788,7 +788,33 @@ def tf_cc_test_mkl(srcs, tags=[], size="medium", args=None): - if_mkl(tf_cc_tests(srcs, deps, name, linkstatic=linkstatic, tags=tags, size=size, args=args, nocopts="-fno-exceptions")) + for src in srcs: + native.cc_test( + name=src_to_test_name(src), + srcs=if_mkl([src]) + tf_binary_additional_srcs(), + copts=tf_copts(), + linkopts=select({ + clean_dep("//tensorflow:android"): [ + "-pie", + ], + clean_dep("//tensorflow:windows"): [], + clean_dep("//tensorflow:windows_msvc"): [], + "//conditions:default": [ + "-lpthread", + "-lm" + ], + }) + _rpath_linkopts(src_to_test_name(src)), + deps=deps + if_mkl( + [ + "//third_party/mkl:intel_binary_blob", + ], + ), + linkstatic=linkstatic, + tags=tags, + size=size, + args=args, + nocopts="-fno-exceptions") + def tf_cc_tests_gpu(srcs, deps, @@ -1006,16 +1032,12 @@ register_extension_info( def tf_mkl_kernel_library(name, prefix=None, srcs=None, - gpu_srcs=None, hdrs=None, deps=None, alwayslink=1, copts=tf_copts(), - nocopts="-fno-exceptions", - **kwargs): + nocopts="-fno-exceptions"): """A rule to build MKL-based TensorFlow kernel libraries.""" - gpu_srcs = gpu_srcs # unused argument - kwargs = kwargs # unused argument if not bool(srcs): srcs = [] @@ -1028,16 +1050,15 @@ def tf_mkl_kernel_library(name, hdrs = hdrs + native.glob( [prefix + "*.h"]) - if_mkl( - native.cc_library( - name=name, - srcs=srcs, - hdrs=hdrs, - deps=deps, - alwayslink=alwayslink, - copts=copts, - nocopts=nocopts - )) + native.cc_library( + name=name, + srcs=if_mkl(srcs), + hdrs=hdrs, + deps=deps, + alwayslink=alwayslink, + copts=copts, + nocopts=nocopts + ) register_extension_info( extension_name = "tf_mkl_kernel_library", -- GitLab From 083cf6b91a380641933457a4301f9b1efa13af92 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Oct 2017 17:03:15 +0000 Subject: [PATCH 0056/1931] Add customerized kernel implementation for clip_by_value This fix tries to address the issue raised in 7225 where `tf.clip_by_value` does not have a custom kernel and reused `tf.maximum` and `tf.mimimum`. In case scalar values are passed to `tf.clip_by_value`, unnecessary memory might incur. This fix adds the customerized kernel implementation for `tf.clip_by_value`. This fix fixes 7225. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 150 +++++++++++++++++++++++ tensorflow/core/ops/math_ops.cc | 23 ++++ 2 files changed, 173 insertions(+) create mode 100644 tensorflow/core/kernels/cwise_op_clip.cc diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc new file mode 100644 index 0000000000..6ce062b08f --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -0,0 +1,150 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/cwise_ops_common.h" + +//#include "third_party/eigen3/Eigen/Core/CwiseTernaryOp.h" + +namespace tensorflow { + +// Unary functor for clip +template +struct UnaryClipOp { + UnaryClipOp(const T& value_min, const T& value_max) + : value_min_(value_min), value_max_(value_max) {} + const T operator()(const T& value) const { + return std::max(std::min(value, value_max_), value_min_); + } + T value_min_; + T value_max_; +}; + +// Binary functor for clip +template +struct BinaryClipMinOp { + BinaryClipMinOp(const T& value_min) : value_min_(value_min) {} + const T operator()(const T& value, const T& value_max) const { + return std::max(std::min(value, value_max), value_min_); + } + T value_min_; +}; + +// Binary functor for clip +template +struct BinaryClipMaxOp { + BinaryClipMaxOp(const T& value_max) : value_max_(value_max) {} + const T operator()(const T& value, const T& value_min) const { + return std::max(std::min(value, value_max_), value_min); + } + T value_max_; +}; + +// Basic coefficient-wise tenary operations. +// This is the case for example of the clip_by_value. +// Device: E.g., CPUDevice, GPUDevice. +// Functor: defined above. E.g., functor::clip. +template +class TenaryOp : public OpKernel { + public: + explicit TenaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& in0 = ctx->input(0); + const Tensor& in1 = ctx->input(1); + const Tensor& in2 = ctx->input(2); + + auto in0_flat = in0.flat(); + auto in1_flat = in1.flat(); + auto in2_flat = in2.flat(); + const Device& d = ctx->eigen_device(); + + Tensor* out = nullptr; + OP_REQUIRES_OK( + ctx, ctx->forward_input_or_allocate_output({0}, 0, in0.shape(), &out)); + auto out_flat = out->flat(); + if (in1.shape() == in2.shape()) { + if (in0.shape() == in1.shape()) { + out_flat = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + } else { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + out_flat = in0_flat.unaryExpr(UnaryClipOp(in1_flat(0), in2_flat(0))); + } + } else { + if (in0.shape() == in1.shape()) { + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in2.shape()), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + out_flat = + in0_flat.binaryExpr(in1_flat, BinaryClipMaxOp(in2_flat(0))); + + } else { + OP_REQUIRES(ctx, (in0.shape() == in2.shape() && + TensorShapeUtils::IsScalar(in1.shape())), + errors::InvalidArgument( + "clip_value_min and clip_value_max must be either of " + "the same shape as input, or a scalar. ", + "input shape: ", in0.shape().DebugString(), + "clip_value_min shape: ", in1.shape().DebugString(), + "clip_value_max shape: ", in2.shape().DebugString())); + out_flat = + in0_flat.binaryExpr(in2_flat, BinaryClipMinOp(in1_flat(0))); + } + } + } +}; + +#define REGISTER_CPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint("T"), \ + TenaryOp); + +REGISTER_CPU_KERNEL(Eigen::half); +REGISTER_CPU_KERNEL(float); +REGISTER_CPU_KERNEL(double); +REGISTER_CPU_KERNEL(int8); +REGISTER_CPU_KERNEL(int16); +REGISTER_CPU_KERNEL(int32); +REGISTER_CPU_KERNEL(int64); +REGISTER_CPU_KERNEL(uint8); +REGISTER_CPU_KERNEL(uint16); + +#undef REGISTER_CPU_KERNEL + +#if GOOGLE_CUDA +// REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); + +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("ClipByValue") + .Device(DEVICE_GPU) + .HostMemory("t") + .HostMemory("clip_value_min") + .HostMemory("clip_value_min") + .TypeConstraint("T"), + TenaryOp); +#endif + +} // namespace tensorflow diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 8f33d51d5a..602a6ec115 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1558,6 +1558,29 @@ REGISTER_OP("Bucketize") .Attr("boundaries: list(float)") .SetShapeFn(shape_inference::UnchangedShape); +REGISTER_OP("ClipByValue") + .Input("t: T") + .Input("clip_value_min: T") + .Input("clip_value_max: T") + .Output("output: T") + .Attr("T: numbertype") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( +Clips tensor values to a specified min and max. + +Given a tensor `t`, this operation returns a tensor of the same type and +shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. +Any values less than `clip_value_min` are set to `clip_value_min`. Any values +greater than `clip_value_max` are set to `clip_value_max`. + +t: A `Tensor`. +clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape + as `t`. The minimum value to clip by. +clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape + as `t`. The maximum value to clip by. +output: A clipped `Tensor` with the same shape as input 't'. +)doc"); + #ifdef INTEL_MKL REGISTER_OP("_MklAddN") .Input("inputs: N * T") -- GitLab From daf0b206b5afde875a19270136ad22d9d2bb138c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Oct 2017 17:08:32 +0000 Subject: [PATCH 0057/1931] Add python wrapper for tf.clip_by_value Signed-off-by: Yong Tang --- tensorflow/python/ops/clip_ops.py | 17 +- tensorflow/python/ops/hidden_ops.txt | 395 +++++++++++++++++++++++++++ 2 files changed, 400 insertions(+), 12 deletions(-) create mode 100644 tensorflow/python/ops/hidden_ops.txt diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index 49f8c66531..a5baebb3f6 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.util.tf_export import tf_export @@ -58,18 +59,10 @@ def clip_by_value(t, clip_value_min, clip_value_max, """ with ops.name_scope(name, "clip_by_value", [t, clip_value_min, clip_value_max]) as name: - t = ops.convert_to_tensor(t, name="t") - - # Go through list of tensors, for each value in each tensor clip - t_min = math_ops.minimum(t, clip_value_max) - # Assert that the shape is compatible with the initial shape, - # to prevent unintentional broadcasting. - _ = t.shape.merge_with(t_min.shape) - - t_max = math_ops.maximum(t_min, clip_value_min, name=name) - _ = t.shape.merge_with(t_max.shape) - - return t_max + return gen_math_ops._clip_by_value(t, + clip_value_min, + clip_value_max, + name=name) @tf_export("clip_by_norm") diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt new file mode 100644 index 0000000000..e1217e984c --- /dev/null +++ b/tensorflow/python/ops/hidden_ops.txt @@ -0,0 +1,395 @@ +# array_ops +BatchToSpace +BroadcastArgs +BroadcastGradientArgs +ConcatOffset +Concat +ConcatV2 +ConjugateTranspose +Const +DebugGradientIdentity +DebugGradientRefIdentity +EditDistance +ExpandDims +ListDiff +MirrorPad +MirrorPadGrad +OneHot +Pack +Pad +PadV2 +ParallelConcat +Placeholder +RefIdentity +Reverse +Snapshot +SpaceToBatch +Split +SplitV +Squeeze +Slice +TileGrad # Exported through array_grad instead of array_ops. +ZerosLike # TODO(josh11b): Use this instead of the Python version. +Unique +UniqueV2 +UniqueWithCounts +UniqueWithCountsV2 +Unpack + +# candidate_sampling_ops +AllCandidateSampler +ComputeAccidentalHits +FixedUnigramCandidateSampler +LearnedUnigramCandidateSampler +LogUniformCandidateSampler +ThreadUnsafeUnigramCandidateSampler +UniformCandidateSampler + +# checkpoint_ops +GenerateVocabRemapping +LoadAndRemapMatrix + + +# control_flow_ops +Switch +Merge +RefMerge +Exit +RefExit + +# ctc_ops +CTCLoss +CTCGreedyDecoder +CTCBeamSearchDecoder + +# data_flow_ops +Barrier +BarrierClose +BarrierIncompleteSize +BarrierInsertMany +BarrierReadySize +BarrierTakeMany +DeleteSessionTensor +FakeQueue +FIFOQueue +FIFOQueueV2 +GetSessionHandle +GetSessionHandleV2 +GetSessionTensor +HashTable +HashTableV2 +InitializeTable +InitializeTableV2 +InitializeTableFromTextFile +InitializeTableFromTextFileV2 +LookupTableExport +LookupTableExportV2 +LookupTableFind +LookupTableFindV2 +LookupTableImport +LookupTableImportV2 +LookupTableInsert +LookupTableInsertV2 +LookupTableSize +LookupTableSizeV2 +MutableDenseHashTable +MutableDenseHashTableV2 +MutableHashTable +MutableHashTableV2 +MutableHashTableOfTensors +MutableHashTableOfTensorsV2 +Mutex +MutexAcquire +MutexRelease +PaddingFIFOQueue +PaddingFIFOQueueV2 +PriorityQueue +PriorityQueueV2 +QueueClose +QueueCloseV2 +QueueDequeue +QueueDequeueV2 +QueueDequeueMany +QueueDequeueManyV2 +QueueDequeueUpTo +QueueDequeueUpToV2 +QueueEnqueue +QueueEnqueueV2 +QueueEnqueueMany +QueueEnqueueManyV2 +QueueSize +QueueSizeV2 +RandomShuffleQueue +RandomShuffleQueueV2 +Stack +StackClose +StackPop +StackPush +StackV2 +StackCloseV2 +StackPopV2 +StackPushV2 +TensorArray +TensorArrayClose +TensorArrayCloseV2 +TensorArrayConcat +TensorArrayConcatV2 +TensorArrayGather +TensorArrayGatherV2 +TensorArrayGrad +TensorArrayGradV2 +TensorArrayPack +TensorArrayPackV2 +TensorArrayRead +TensorArrayReadV2 +TensorArrayScatter +TensorArrayScatterV2 +TensorArraySize +TensorArraySizeV2 +TensorArraySplit +TensorArraySplitV2 +TensorArrayUnpack +TensorArrayUnpackV2 +TensorArrayV2 +TensorArrayWrite +TensorArrayWriteV2 +TensorArrayV3 +TensorArrayCloseV3 +TensorArrayConcatV3 +TensorArrayGatherV3 +TensorArrayGradV3 +TensorArrayReadV3 +TensorArrayPackV3 +TensorArrayScatterV3 +TensorArraySizeV3 +TensorArraySplitV3 +TensorArrayUnpackV3 +TensorArrayWriteV3 + +# functional_ops +SymbolicGradient + +# image_ops +AdjustContrastv2 +NonMaxSuppression +NonMaxSuppressionV2 +RandomCrop +ResizeBilinearGrad +ResizeBicubicGrad +ResizeNearestNeighborGrad +SampleDistortedBoundingBox +SampleDistortedBoundingBoxV2 +ScaleImageGrad + +# io_ops +FixedLengthRecordReader +IdentityReader +ReaderNumRecordsProduced +ReaderNumWorkUnitsCompleted +ReaderRead +ReaderReadUpTo +ReaderReset +ReaderRestoreState +ReaderSerializeState +ReaderWorkQueueLength +FixedLengthRecordReaderV2 +IdentityReaderV2 +ReaderNumRecordsProducedV2 +ReaderNumWorkUnitsCompletedV2 +ReaderReadV2 +ReaderReadUpToV2 +ReaderResetV2 +ReaderRestoreStateV2 +ReaderSerializeStateV2 +ReaderWorkQueueLengthV2 +Restore +RestoreSlice +Save +SaveSlices +ShardedFilename +ShardedFilespec +TextLineReader +TFRecordReader +WholeFileReader +TextLineReaderV2 +TFRecordReaderV2 +WholeFileReaderV2 +LMDBReader +DecodeCSV + +# linalg_ops +BatchCholesky +BatchCholeskyGrad +BatchMatrixDeterminant +BatchMatrixInverse +BatchMatrixSolve +BatchMatrixSolveLs +BatchMatrixTriangularSolve +BatchSelfAdjointEig +BatchSelfAdjointEigV2 +BatchSvd +LogMatrixDeterminant +MatrixExponential +MatrixLogarithm +MatrixSolveLs +SelfAdjointEig +SelfAdjointEigV2 +Svd + +# logging_ops +Assert +AudioSummary +AudioSummaryV2 +HistogramSummary +ImageSummary +MergeSummary +Print +ScalarSummary +TensorSummary +TensorSummaryV2 + +# math_ops +Abs +AccumulateNV2 +AddN +AddV2 +All +Any +BatchMatMul +BatchFFT +BatchFFT2D +BatchFFT3D +BatchIFFT +BatchIFFT2D +BatchIFFT3D +Bucketize +ClipByValue +Complex +ComplexAbs +Conj +FloorDiv +FloorMod +HistogramFixedWidth +Max +Mean +Min +Mul +Neg +Pow +Prod +Range +RealDiv +Select +SparseMatMul +Sub +Sum +MatMul +Sigmoid +Tanh +SigmoidGrad +TanhGrad +InvGrad +ReciprocalGrad +SqrtGrad +RsqrtGrad +TruncateDiv +TruncateMod + +# nn_ops +AvgPoolGrad # "*Grad" accessible through nn_grad instead of nn_ops. +AvgPool3DGrad +BatchNormWithGlobalNormalization +BatchNormWithGlobalNormalizationGrad +FusedBatchNorm +FusedBatchNormV2 +SoftmaxCrossEntropyWithLogits +SparseSoftmaxCrossEntropyWithLogits +LRNGrad +MaxPoolGrad +MaxPoolGradWithArgmax +MaxPoolGradGrad +MaxPoolGradGradWithArgmax +MaxPool3DGrad +MaxPool3DGradGrad +ReluGrad +Relu6Grad +EluGrad +SeluGrad +SoftplusGrad +SoftsignGrad +TopK +TopKV2 +BiasAdd +BiasAddV1 +Relu6 +AvgPool +MaxPool +MaxPoolV2 +Softmax +LogSoftmax +FractionalAvgPoolGrad +FractionalMaxPoolGrad +InTopK +InTopKV2 + +# parsing_ops +ParseExample +ParseSingleSequenceExample + +# random_ops +RandomGamma +RandomPoisson +RandomUniform +RandomUniformInt +RandomShuffle +RandomStandardNormal +ParameterizedTruncatedNormal +TruncatedNormal + +# script_ops +PyFunc +PyFuncStateless +EagerPyFunc + +# sdca_ops + +# state_ops +Variable +VariableV2 +TemporaryVariable +DestroyTemporaryVariable + +# sparse_ops +AddSparseToTensorsMap +AddManySparseToTensorsMap +TakeManySparseFromTensorsMap +DeserializeManySparse +DeserializeSparse +SerializeManySparse +SerializeSparse +SparseAdd +SparseAddGrad +SparseConcat +SparseCross +SparseFillEmptyRows +SparseFillEmptyRowsGrad +SparseSplit +SparseSelectLastK +SparseReorder +SparseReshape +SparseToDense +SparseTensorDenseAdd +SparseTensorDenseMatMul + +# string_ops +StringSplit + +# user_ops +Fact + +# training_ops +# (None) + +# word2vec deprecated ops +NegTrain +Skipgram -- GitLab From 90a271e7a37574fc1c90fd6042c3b3972645d114 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Oct 2017 17:09:05 +0000 Subject: [PATCH 0058/1931] Update tests for `tf.clip_by_value` Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/clip_ops_test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 5c8b71da17..d47930350e 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops from tensorflow.python.platform import test @@ -42,10 +43,12 @@ class ClipTest(test.TestCase): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) - with self.assertRaises(ValueError): - _ = clip_ops.clip_by_value(x, -clip, clip) - with self.assertRaises(ValueError): - _ = clip_ops.clip_by_value(x, 1.0, clip) + with self.assertRaises(errors_impl.InvalidArgumentError): + ans = clip_ops.clip_by_value(x, -clip, clip) + tf_ans = ans.eval() + with self.assertRaises(errors_impl.InvalidArgumentError): + ans = clip_ops.clip_by_value(x, 1.0, clip) + tf_ans = ans.eval() def testClipByValueNonFinite(self): with self.test_session(): -- GitLab From cff8abcb1a9305491637dc44559316aa1d8184e6 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 26 Oct 2017 04:37:55 +0000 Subject: [PATCH 0059/1931] Add GPU kernel for tf.clip_by_value Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 162 +++++++++++++----- tensorflow/core/kernels/cwise_op_clip.h | 61 +++++++ .../core/kernels/cwise_op_clip_gpu.cu.cc | 134 +++++++++++++++ 3 files changed, 313 insertions(+), 44 deletions(-) create mode 100644 tensorflow/core/kernels/cwise_op_clip.h create mode 100644 tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index 6ce062b08f..c2980acdd8 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -13,43 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/cwise_ops_common.h" - -//#include "third_party/eigen3/Eigen/Core/CwiseTernaryOp.h" +#include "tensorflow/core/kernels/cwise_op_clip.h" namespace tensorflow { -// Unary functor for clip -template -struct UnaryClipOp { - UnaryClipOp(const T& value_min, const T& value_max) - : value_min_(value_min), value_max_(value_max) {} - const T operator()(const T& value) const { - return std::max(std::min(value, value_max_), value_min_); - } - T value_min_; - T value_max_; -}; - -// Binary functor for clip -template -struct BinaryClipMinOp { - BinaryClipMinOp(const T& value_min) : value_min_(value_min) {} - const T operator()(const T& value, const T& value_max) const { - return std::max(std::min(value, value_max), value_min_); - } - T value_min_; -}; - -// Binary functor for clip -template -struct BinaryClipMaxOp { - BinaryClipMaxOp(const T& value_max) : value_max_(value_max) {} - const T operator()(const T& value, const T& value_min) const { - return std::max(std::min(value, value_max_), value_min); - } - T value_max_; -}; +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; // Basic coefficient-wise tenary operations. // This is the case for example of the clip_by_value. @@ -76,7 +45,8 @@ class TenaryOp : public OpKernel { auto out_flat = out->flat(); if (in1.shape() == in2.shape()) { if (in0.shape() == in1.shape()) { - out_flat = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + functor::TernaryClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } else { OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(in1.shape()), errors::InvalidArgument( @@ -85,7 +55,8 @@ class TenaryOp : public OpKernel { "input shape: ", in0.shape().DebugString(), "clip_value_min shape: ", in1.shape().DebugString(), "clip_value_max shape: ", in2.shape().DebugString())); - out_flat = in0_flat.unaryExpr(UnaryClipOp(in1_flat(0), in2_flat(0))); + functor::UnaryClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } } else { if (in0.shape() == in1.shape()) { @@ -96,9 +67,8 @@ class TenaryOp : public OpKernel { "input shape: ", in0.shape().DebugString(), "clip_value_min shape: ", in1.shape().DebugString(), "clip_value_max shape: ", in2.shape().DebugString())); - out_flat = - in0_flat.binaryExpr(in1_flat, BinaryClipMaxOp(in2_flat(0))); - + functor::BinaryLeftClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } else { OP_REQUIRES(ctx, (in0.shape() == in2.shape() && TensorShapeUtils::IsScalar(in1.shape())), @@ -108,13 +78,103 @@ class TenaryOp : public OpKernel { "input shape: ", in0.shape().DebugString(), "clip_value_min shape: ", in1.shape().DebugString(), "clip_value_max shape: ", in2.shape().DebugString())); - out_flat = - in0_flat.binaryExpr(in2_flat, BinaryClipMinOp(in1_flat(0))); + functor::BinaryRightClipOp()(d, in0_flat, in1_flat, in2_flat, + out_flat); } } } }; +namespace functor { +// Unary functor for clip [Tensor, Scalar, Scalar] +template +struct UnaryClipFunc { + UnaryClipFunc(const T& value_min, const T& value_max) + : value_min_(value_min), value_max_(value_max) {} + const T operator()(const T& value) const { + return std::max(std::min(value, value_max_), value_min_); + } + T value_min_; + T value_max_; +}; +template +struct UnaryClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat = in0_flat.unaryExpr(UnaryClipFunc(in1_flat(0), in2_flat(0))); + } +}; + +// Binary functor for clip [Tensor, Scalar, Tensor] +template +struct BinaryRightClipFunc { + BinaryRightClipFunc(const T& value_min) : value_min_(value_min) {} + const T operator()(const T& value, const T& value_max) const { + return std::max(std::min(value, value_max), value_min_); + } + T value_min_; +}; +template +struct BinaryRightClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat = + in0_flat.binaryExpr(in2_flat, BinaryRightClipFunc(in1_flat(0))); + } +}; + +// Binary functor for clip [Tensor, Tensor, Scalar] +template +struct BinaryLeftClipFunc { + BinaryLeftClipFunc(const T& value_max) : value_max_(value_max) {} + const T operator()(const T& value, const T& value_min) const { + return std::max(std::min(value, value_max_), value_min); + } + T value_max_; +}; +template +struct BinaryLeftClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat = + in0_flat.binaryExpr(in1_flat, BinaryLeftClipFunc(in2_flat(0))); + } +}; + +// Ternary functor for clip [Tensor, Tensor, Tensor] +template +struct TernaryClipOp { + void operator()(const CPUDevice& d, typename TTypes::ConstFlat& in0_flat, + typename TTypes::ConstFlat& in1_flat, + typename TTypes::ConstFlat& in2_flat, + typename TTypes::Flat& out_flat) const { + out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + } +}; + +#define INSTANTIATE_CPU(T) \ + template struct UnaryClipOp; \ + template struct BinaryRightClipOp; \ + template struct BinaryLeftClipOp; \ + template struct TernaryClipOp; +INSTANTIATE_CPU(Eigen::half); +INSTANTIATE_CPU(float); +INSTANTIATE_CPU(double); +INSTANTIATE_CPU(int8); +INSTANTIATE_CPU(int16); +INSTANTIATE_CPU(int32); +INSTANTIATE_CPU(int64); +INSTANTIATE_CPU(uint8); +INSTANTIATE_CPU(uint16); +#undef INSTANTIATE_CPU +} // namespace functor + #define REGISTER_CPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint("T"), \ @@ -129,11 +189,22 @@ REGISTER_CPU_KERNEL(int32); REGISTER_CPU_KERNEL(int64); REGISTER_CPU_KERNEL(uint8); REGISTER_CPU_KERNEL(uint16); - #undef REGISTER_CPU_KERNEL #if GOOGLE_CUDA -// REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); + +#define REGISTER_GPU_KERNEL(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("ClipByValue").Device(DEVICE_GPU).TypeConstraint("T"), \ + TenaryOp); +REGISTER_GPU_KERNEL(Eigen::half); +REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(double); +REGISTER_GPU_KERNEL(int8); +REGISTER_GPU_KERNEL(int16); +REGISTER_GPU_KERNEL(int64); +REGISTER_GPU_KERNEL(uint8); +REGISTER_GPU_KERNEL(uint16); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel @@ -142,9 +213,12 @@ REGISTER_KERNEL_BUILDER(Name("ClipByValue") .Device(DEVICE_GPU) .HostMemory("t") .HostMemory("clip_value_min") - .HostMemory("clip_value_min") + .HostMemory("clip_value_max") + .HostMemory("output") .TypeConstraint("T"), TenaryOp); + +#undef REGISTER_GPU_KERNEL #endif } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_clip.h b/tensorflow/core/kernels/cwise_op_clip.h new file mode 100644 index 0000000000..1a4bf8cf1d --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_clip.h @@ -0,0 +1,61 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_ +#define TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_ + +#include "tensorflow/core/kernels/cwise_ops_common.h" + +namespace tensorflow { +namespace functor { +// Unary functor for clip [Tensor, Scalar, Scalar] +template +struct UnaryClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; + +// Binary functor for clip [Tensor, Scalar, Tensor] +template +struct BinaryRightClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; + +// Binary functor for clip [Tensor, Tensor, Scalar] +template +struct BinaryLeftClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; + +// Ternary functor for clip [Tensor, Tensor, Tensor] +template +struct TernaryClipOp { + void operator()(const Device &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const; +}; +} +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_CWISE_OP_CLIP_H_ diff --git a/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc new file mode 100644 index 0000000000..5c07847548 --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_clip_gpu.cu.cc @@ -0,0 +1,134 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/kernels/cwise_op_clip.h" +#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { + +template +__global__ void UnaryClipCustomKernel(const int32 size_in, const T *in0, + const T *in1, const T *in2, T *out) { + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in2[0] < in0[i] ? in2[0] : in0[i]; + out[i] = value < in1[0] ? in1[0] : value; + } +} + +template +__global__ void BinaryRightClipCustomKernel(const int32 size_in, const T *in0, + const T *in1, const T *in2, + T *out) { + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in2[i] < in0[i] ? in2[i] : in0[i]; + out[i] = value < in1[0] ? in1[0] : value; + } +} + +template +__global__ void BinaryLeftClipCustomKernel(const int32 size_in, const T *in0, + const T *in1, const T *in2, T *out) { + CUDA_1D_KERNEL_LOOP(i, size_in) { + T value = in2[0] < in0[i] ? in2[0] : in0[i]; + out[i] = value < in1[i] ? in1[i] : value; + } +} + +namespace functor { + +// Unary functor for clip [Tensor, Scalar, Scalar] +template +struct UnaryClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d); + + UnaryClipCustomKernel< + T><<>>( + in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(), + out_flat.data()); + } +}; + +// Binary functor for clip [Tensor, Scalar, Tensor] +template +struct BinaryRightClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d); + + BinaryRightClipCustomKernel< + T><<>>( + in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(), + out_flat.data()); + } +}; + +// Binary functor for clip [Tensor, Tensor, Scalar] +template +struct BinaryLeftClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + CudaLaunchConfig config = GetCudaLaunchConfig(in0_flat.size(), d); + + BinaryLeftClipCustomKernel< + T><<>>( + in0_flat.size(), in0_flat.data(), in1_flat.data(), in2_flat.data(), + out_flat.data()); + } +}; + +// Ternary functor for clip [Tensor, Tensor, Tensor] +template +struct TernaryClipOp { + void operator()(const GPUDevice &d, typename TTypes::ConstFlat &in0_flat, + typename TTypes::ConstFlat &in1_flat, + typename TTypes::ConstFlat &in2_flat, + typename TTypes::Flat &out_flat) const { + out_flat.device(d) = in0_flat.cwiseMin(in2_flat).cwiseMax(in1_flat); + } +}; + +#define INSTANTIATE_GPU(T) \ + template struct UnaryClipOp; \ + template struct BinaryRightClipOp; \ + template struct BinaryLeftClipOp; \ + template struct TernaryClipOp; +INSTANTIATE_GPU(Eigen::half); +INSTANTIATE_GPU(float); +INSTANTIATE_GPU(double); +INSTANTIATE_GPU(int8); +INSTANTIATE_GPU(int16); +INSTANTIATE_GPU(int32); +INSTANTIATE_GPU(int64); +INSTANTIATE_GPU(uint8); +INSTANTIATE_GPU(uint16); +#undef INSTANTIATE_GPU + +} // namespace functor +} // namespace tensorflow + +#endif // GOOGLE_CUDA -- GitLab From a3553d45b63fba1cd4eb8d1d5b6dd0d565c94879 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 26 Oct 2017 04:38:38 +0000 Subject: [PATCH 0060/1931] Update test cases for tf.clip_by_value Signed-off-by: Yong Tang --- .../python/kernel_tests/clip_ops_test.py | 105 ++++++++++++++---- 1 file changed, 85 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index d47930350e..2d03fb99e4 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops @@ -29,7 +30,7 @@ class ClipTest(test.TestCase): # ClipByValue test def testClipByValue(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3]) np_ans = [[-4.4, 2.0, 3.0], [4.0, 4.4, 4.4]] clip_value = 4.4 @@ -38,8 +39,72 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans) + # [Tensor, Scalar, Scalar] + def testClipByValue0Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [4, 4, 4]] + clip_value_min = 2 + clip_value_max = 4 + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Tensor, Scalar] + def testClipByValue1Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [4, 4, 4]] + clip_value_min = constant_op.constant([2, 2, 2, 3, 3, 3], shape=[2, 3], + dtype=dtype) + clip_value_max = 4 + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Scalar, Tensor] + def testClipByValue2Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[4, 4, 4], [4, 5, 6]] + clip_value_min = 4 + clip_value_max = constant_op.constant([6, 6, 6, 6, 6, 6], shape=[2, 3], + dtype=dtype) + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + + # [Tensor, Tensor, Tensor] + def testClipByValue3Type(self): + for dtype in [dtypes.float16, dtypes.float32, dtypes.float64, + dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, + dtypes.uint8, dtypes.uint16]: + with self.test_session(use_gpu=True): + x = constant_op.constant([1, 2, 3, 4, 5, 6], shape=[2, 3], dtype=dtype) + np_ans = [[2, 2, 3], [5, 5, 6]] + clip_value_min = constant_op.constant([2, 2, 2, 5, 5, 5], shape=[2, 3], + dtype=dtype) + clip_value_max = constant_op.constant([5, 5, 5, 7, 7, 7], shape=[2, 3], + dtype=dtype) + ans = clip_ops.clip_by_value(x, clip_value_min, clip_value_max) + tf_ans = ans.eval() + + self.assertAllClose(np_ans, tf_ans) + def testClipByValueBadShape(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-5.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) @@ -51,7 +116,7 @@ class ClipTest(test.TestCase): tf_ans = ans.eval() def testClipByValueNonFinite(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([float('NaN'), float('Inf'), -float('Inf')]) np_ans = [float('NaN'), 4.0, -4.0] clip_value = 4.0 @@ -63,7 +128,7 @@ class ClipTest(test.TestCase): # ClipByNorm tests def testClipByNormClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 0.0]] @@ -79,7 +144,7 @@ class ClipTest(test.TestCase): self.assertAllClose(np_ans, tf_ans_tensor) def testClipByNormBadShape(self): - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3, 1]) # Use a nonsensical shape. clip = constant_op.constant([1.0, 2.0]) @@ -88,7 +153,7 @@ class ClipTest(test.TestCase): def testClipByNormNotClipped(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Norm of x = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] @@ -100,7 +165,7 @@ class ClipTest(test.TestCase): def testClipByNormZero(self): # No norm clipping when norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] @@ -112,7 +177,7 @@ class ClipTest(test.TestCase): def testClipByNormClippedWithDim0(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[:, 0] = sqrt(3^2 + 4^2) = 5, x[:, 2] = 3 np_ans = [[-2.4, 0.0, 0.0], [3.2, 0.0, 3.0]] @@ -124,7 +189,7 @@ class ClipTest(test.TestCase): def testClipByNormClippedWithDim1(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [3.2, 0.0, 2.4]] @@ -136,7 +201,7 @@ class ClipTest(test.TestCase): def testClipByNormNotClippedWithAxes(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 3.0], shape=[2, 3]) # Norm of x[0, :] = 3, x[1, :] = sqrt(3^2 + 4^2) = 5 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 3.0]] @@ -149,7 +214,7 @@ class ClipTest(test.TestCase): # ClipByGlobalNorm tests def testClipByGlobalNormClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -170,7 +235,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormClippedTensor(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -191,7 +256,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormSupportsNone(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -214,7 +279,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormWithIndexedSlicesClipped(self): # Norm clipping when clip_norm < 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = ops.IndexedSlices( constant_op.constant([1.0, -2.0]), constant_op.constant([3, 4])) @@ -247,7 +312,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormNotClipped(self): # No norm clipping when clip_norm >= 5 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([-2.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([1.0, -2.0]) # Global norm of x0 and x1 = sqrt(1 + 4^2 + 2^2 + 2^2) = 5 @@ -266,7 +331,7 @@ class ClipTest(test.TestCase): def testClipByGlobalNormZero(self): # No norm clipping when norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x0 = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) x1 = constant_op.constant([0.0, 0.0]) # Norm = 0, no changes @@ -285,7 +350,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormClipped(self): # Norm clipping when average clip_norm < 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]] @@ -297,7 +362,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormClippedTensor(self): # Norm clipping when average clip_norm < 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-2.88, 0.0, 0.0], [3.84, 0.0, 0.0]] @@ -309,7 +374,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormNotClipped(self): # No norm clipping when average clip_norm >= 0.83333333 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([-3.0, 0.0, 0.0, 4.0, 0.0, 0.0], shape=[2, 3]) # Average norm of x = sqrt(3^2 + 4^2) / 6 = 0.83333333 np_ans = [[-3.0, 0.0, 0.0], [4.0, 0.0, 0.0]] @@ -321,7 +386,7 @@ class ClipTest(test.TestCase): def testClipByAverageNormZero(self): # No norm clipping when average clip_norm = 0 - with self.test_session(): + with self.test_session(use_gpu=True): x = constant_op.constant([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], shape=[2, 3]) # Average norm = 0, no changes np_ans = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] -- GitLab From a5e9d9a387680b0b1d7d8ed08fc9c07477a7efe7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 30 Oct 2017 23:42:08 +0000 Subject: [PATCH 0061/1931] Add grad registration for clip_by_value and address review feedbacks. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 2 +- .../python/kernel_tests/clip_ops_test.py | 16 ++++++++++++ tensorflow/python/ops/clip_ops.py | 25 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index c2980acdd8..f30c49fdf8 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tensorflow/python/kernel_tests/clip_ops_test.py b/tensorflow/python/kernel_tests/clip_ops_test.py index 2d03fb99e4..cb1359be15 100644 --- a/tensorflow/python/kernel_tests/clip_ops_test.py +++ b/tensorflow/python/kernel_tests/clip_ops_test.py @@ -23,11 +23,27 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import gradient_checker from tensorflow.python.platform import test class ClipTest(test.TestCase): + def testClipByValueGradient(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], dtype=dtypes.float32) + outputs_1 = clip_ops.clip_by_value(inputs, 0.5, 3.5) + min_val = constant_op.constant([0.5, 0.5, 0.5, 0.5], dtype=dtypes.float32) + max_val = constant_op.constant([3.5, 3.5, 3.5, 3.5], dtype=dtypes.float32) + outputs_2 = clip_ops.clip_by_value(inputs, min_val, max_val) + with self.test_session(): + error_1 = gradient_checker.compute_gradient_error(inputs, [4], + outputs_1, [4]) + self.assertLess(error_1, 1e-4) + + error_2 = gradient_checker.compute_gradient_error(inputs, [4], + outputs_2, [4]) + self.assertLess(error_2, 1e-4) + # ClipByValue test def testClipByValue(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py index a5baebb3f6..e84cfc6944 100644 --- a/tensorflow/python/ops/clip_ops.py +++ b/tensorflow/python/ops/clip_ops.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops @@ -64,6 +65,30 @@ def clip_by_value(t, clip_value_min, clip_value_max, clip_value_max, name=name) +@ops.RegisterGradient("ClipByValue") +def _ClipByValueGrad(op, grad): + """Returns grad of clip_by_value.""" + x = op.inputs[0] + y = op.inputs[1] + z = op.inputs[2] + gdtype = grad.dtype + sx = array_ops.shape(x) + sy = array_ops.shape(y) + sz = array_ops.shape(z) + gradshape = array_ops.shape(grad) + zeros = array_ops.zeros(gradshape, gdtype) + xymask = math_ops.less(x, y) + xzmask = math_ops.greater(x, z) + rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) + rx, rz = gen_array_ops._broadcast_gradient_args(sx, sz) + xgrad = array_ops.where(math_ops.logical_or(xymask, xzmask), zeros, grad) + ygrad = array_ops.where(xymask, grad, zeros) + zgrad = array_ops.where(xzmask, grad, zeros) + gx = array_ops.reshape(math_ops.reduce_sum(xgrad, rx), sx) + gy = array_ops.reshape(math_ops.reduce_sum(ygrad, ry), sy) + gz = array_ops.reshape(math_ops.reduce_sum(zgrad, rz), sz) + return (gx, gy, gz) + @tf_export("clip_by_norm") def clip_by_norm(t, clip_norm, axes=None, name=None): -- GitLab From 71ddf90d3c8c49d4401c0d298bf63b92150dadaa Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 14 Dec 2017 04:06:58 +0000 Subject: [PATCH 0062/1931] Update with `TenaryOp` -> `ClipOp` Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_clip.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_clip.cc b/tensorflow/core/kernels/cwise_op_clip.cc index f30c49fdf8..bd22f5777c 100644 --- a/tensorflow/core/kernels/cwise_op_clip.cc +++ b/tensorflow/core/kernels/cwise_op_clip.cc @@ -25,9 +25,9 @@ typedef Eigen::GpuDevice GPUDevice; // Device: E.g., CPUDevice, GPUDevice. // Functor: defined above. E.g., functor::clip. template -class TenaryOp : public OpKernel { +class ClipOp : public OpKernel { public: - explicit TenaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + explicit ClipOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { const Tensor& in0 = ctx->input(0); @@ -178,7 +178,7 @@ INSTANTIATE_CPU(uint16); #define REGISTER_CPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("ClipByValue").Device(DEVICE_CPU).TypeConstraint("T"), \ - TenaryOp); + ClipOp); REGISTER_CPU_KERNEL(Eigen::half); REGISTER_CPU_KERNEL(float); @@ -196,7 +196,7 @@ REGISTER_CPU_KERNEL(uint16); #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER( \ Name("ClipByValue").Device(DEVICE_GPU).TypeConstraint("T"), \ - TenaryOp); + ClipOp); REGISTER_GPU_KERNEL(Eigen::half); REGISTER_GPU_KERNEL(float); REGISTER_GPU_KERNEL(double); @@ -216,7 +216,7 @@ REGISTER_KERNEL_BUILDER(Name("ClipByValue") .HostMemory("clip_value_max") .HostMemory("output") .TypeConstraint("T"), - TenaryOp); + ClipOp); #undef REGISTER_GPU_KERNEL #endif -- GitLab From d1078b562532e2de60bc16fc544a94823149ae77 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 18 Dec 2017 17:42:37 +0000 Subject: [PATCH 0063/1931] Fix failing test //tensorflow/python:function_test Signed-off-by: Yong Tang --- tensorflow/python/framework/function_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 65ca801cbe..24aaff3748 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -1333,7 +1333,7 @@ class UnrollLSTMTest(test.TestCase): value=math_ops.matmul(xm, weights), num_or_size_splits=4, axis=1) new_c = math_ops.sigmoid(f_g) * cprev + math_ops.sigmoid( i_g) * math_ops.tanh(i_i) - new_c = clip_ops.clip_by_value(new_c, -50.0, 50.0) + new_c = math_ops.maximum(math_ops.minimum(new_c, 50.0), -50.0) new_m = math_ops.sigmoid(o_g) * math_ops.tanh(new_c) return new_m, new_c -- GitLab From 14e9c14ecdb9e9ddb283c5ec9cf27b3c5dbb900e Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 18 Dec 2017 18:58:42 +0000 Subject: [PATCH 0064/1931] Fix api_compatibility_test with `--update_goldens True` Signed-off-by: Yong Tang --- .../base_api/api_def_ClipByValue.pbtxt | 36 +++++++++++++++++++ .../python_api/api_def_ClipByValue.pbtxt | 4 +++ 2 files changed, 40 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ClipByValue.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt new file mode 100644 index 0000000000..803d8970ab --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ClipByValue.pbtxt @@ -0,0 +1,36 @@ +op { + graph_op_name: "ClipByValue" + in_arg { + name: "t" + description: <